diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 01e1d01..5b690f5 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1044,7 +1044,7 @@ "That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.\n" + "The optimization will be automatically disabled if number of reducers would be less than specified value."), - HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", true, + HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", false, "When enabled dynamic partitioning column will be globally sorted.\n" + "This way we can keep only one record writer open for each partition value\n" + "in the reducer thereby reducing the memory pressure on reducers."), diff --git data/files/dynpart_test.txt data/files/dynpart_test.txt new file mode 100644 index 0000000..ab6cd4a --- /dev/null +++ data/files/dynpart_test.txt @@ -0,0 +1,24 @@ +24526172.99-11.32 +245261710022.633952.8 +24526172.1-2026.3 +2452617552.96-1363.84 +24526171765.07-4648.8 +2452617879.07-2185.76 +24526177412.832071.68 +245261785.825.61 +2452617565.92196.48 +24526175362.01-600.28 +24526173423.95-3164.07 +24526384133.98-775.72 +245263810171.1660.48 +2452638317.87-3775.38 +2452638156.67-4626.56 +24526381327.0857.97 +24526381971.35-488.25 +2452638181.03-207.24 +2452638267.01-3266.36 +24526380.15-241.22 +24526381524.33494.37 +2452638150.39-162.12 +24526381413.19178.08 +24526384329.49-4000.51 diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 3ea6bb5..0711ab1 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -75,6 +75,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ disable_merge_for_bucketing.q,\ dynpart_sort_opt_vectorization.q,\ dynpart_sort_optimization.q,\ + dynpart_sort_optimization2.q,\ enforce_order.q,\ filter_join_breaktask.q,\ filter_join_breaktask2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 3ff0782..e02071c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -29,6 +29,8 @@ import java.util.Map; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -38,13 +40,13 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.RecordUpdater; -import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HivePartitioner; import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; +import org.apache.hadoop.hive.ql.io.RecordUpdater; +import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveFatalException; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -72,14 +74,16 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.collect.Lists; - /** * File Sink operator implementation. **/ public class FileSinkOperator extends TerminalOperator implements Serializable { + public static final Log LOG = LogFactory.getLog(FileSinkOperator.class); + private static final boolean isInfoEnabled = LOG.isInfoEnabled(); + private static final boolean isDebugEnabled = LOG.isDebugEnabled(); + protected transient HashMap valToPaths; protected transient int numDynParts; protected transient List dpColNames; @@ -101,10 +105,6 @@ protected transient boolean isCollectRWStats; private transient FSPaths prevFsp; private transient FSPaths fpaths; - private transient ObjectInspector keyOI; - private transient List keyWritables; - private transient List keys; - private transient int numKeyColToRead; private StructField recIdField; // field to find record identifier in private StructField bucketField; // field bucket is in in record id private StructObjectInspector recIdInspector; // OI for inspecting record id @@ -131,9 +131,6 @@ int acidLastBucket = -1; int acidFileOffset = -1; - public FSPaths() { - } - public FSPaths(Path specPath) { tmpPath = Utilities.toTempPath(specPath); taskOutputTempPath = Utilities.toTaskTempPath(specPath); @@ -141,7 +138,9 @@ public FSPaths(Path specPath) { finalPaths = new Path[numFiles]; outWriters = new RecordWriter[numFiles]; updaters = new RecordUpdater[numFiles]; - LOG.debug("Created slots for " + numFiles); + if (isDebugEnabled) { + LOG.debug("Created slots for " + numFiles); + } stat = new Stat(); } @@ -326,7 +325,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { parent = Utilities.toTempPath(conf.getDirName()); statsCollectRawDataSize = conf.isStatsCollectRawDataSize(); statsFromRecordWriter = new boolean[numFiles]; - serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance(); serializer.initialize(null, conf.getTableInfo().getProperties()); outputClass = serializer.getSerializedClass(); @@ -363,20 +361,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { lbSetup(); } - int numPart = 0; - int numBuck = 0; - if (conf.getPartitionCols() != null && !conf.getPartitionCols().isEmpty()) { - numPart = conf.getPartitionCols().size(); - } - - // bucket number will exists only in PARTITION_BUCKET_SORTED mode - if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { - numBuck = 1; - } - numKeyColToRead = numPart + numBuck; - keys = Lists.newArrayListWithCapacity(numKeyColToRead); - keyWritables = Lists.newArrayListWithCapacity(numKeyColToRead); - if (!bDynParts) { fsp = new FSPaths(specPath); @@ -423,7 +407,8 @@ private void dpSetup() { this.dpColNames = dpCtx.getDPColNames(); this.maxPartitions = dpCtx.getMaxPartitionsPerNode(); - assert numDynParts == dpColNames.size() : "number of dynamic paritions should be the same as the size of DP mapping"; + assert numDynParts == dpColNames.size() + : "number of dynamic paritions should be the same as the size of DP mapping"; if (dpColNames != null && dpColNames.size() > 0) { this.bDynParts = true; @@ -441,6 +426,9 @@ private void dpSetup() { newFieldsOI.add(sf.getFieldObjectInspector()); newFieldsName.add(sf.getFieldName()); this.dpStartCol++; + } else { + // once we found the start column for partition column we are done + break; } } assert newFieldsOI.size() > 0 : "new Fields ObjectInspector is empty"; @@ -457,11 +445,15 @@ protected void createBucketFiles(FSPaths fsp) throws HiveException { Set seenBuckets = new HashSet(); for (int idx = 0; idx < totalFiles; idx++) { if (this.getExecContext() != null && this.getExecContext().getFileId() != null) { - LOG.info("replace taskId from execContext "); + if (isInfoEnabled) { + LOG.info("replace taskId from execContext "); + } taskId = Utilities.replaceTaskIdFromFilename(taskId, this.getExecContext().getFileId()); - LOG.info("new taskId: FS " + taskId); + if (isInfoEnabled) { + LOG.info("new taskId: FS " + taskId); + } assert !multiFileSpray; assert totalFiles == 1; @@ -515,9 +507,13 @@ protected void createBucketForFileIdx(FSPaths fsp, int filesIdx) try { if (isNativeTable) { fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, null); - LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]); + if (isInfoEnabled) { + LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]); + } fsp.outPaths[filesIdx] = fsp.getTaskOutPath(taskId); - LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]); + if (isInfoEnabled) { + LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]); + } } else { fsp.finalPaths[filesIdx] = fsp.outPaths[filesIdx] = specPath; } @@ -532,7 +528,9 @@ protected void createBucketForFileIdx(FSPaths fsp, int filesIdx) fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, extension); } - LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]); + if (isInfoEnabled) { + LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]); + } if (isNativeTable) { // in recent hadoop versions, use deleteOnExit to clean tmp files. @@ -604,14 +602,22 @@ public void processOp(Object row, int tag) throws HiveException { updateProgress(); // if DP is enabled, get the final output writers and prepare the real output row - assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct"; + assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT + : "input object inspector is not struct"; if (bDynParts) { + + // we need to read bucket number which is the last column in value (after partition columns) + if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { + numDynParts += 1; + } + // copy the DP column values from the input row to dpVals dpVals.clear(); dpWritables.clear(); - ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol, numDynParts, - (StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE); + ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol,numDynParts, + (StructObjectInspector) inputObjInspectors[0],ObjectInspectorCopyOption.WRITABLE); + // get a set of RecordWriter based on the DP column values // pass the null value along to the escaping process to determine what the dir should be for (Object o : dpWritables) { @@ -621,16 +627,11 @@ public void processOp(Object row, int tag) throws HiveException { dpVals.add(o.toString()); } } - // use SubStructObjectInspector to serialize the non-partitioning columns in the input row - recordValue = serializer.serialize(row, subSetOI); - // when dynamic partition sorting is not used, the DPSortState will be NONE - // in which we will fall back to old method of file system path creation - // i.e, having as many record writers as distinct values in partition column - if (conf.getDpSortState().equals(DPSortState.NONE)) { - fpaths = getDynOutPaths(dpVals, lbDirName); - } + fpaths = getDynOutPaths(dpVals, lbDirName); + // use SubStructObjectInspector to serialize the non-partitioning columns in the input row + recordValue = serializer.serialize(row, subSetOI); } else { if (lbDirName != null) { fpaths = lookupListBucketingPaths(lbDirName); @@ -686,8 +687,10 @@ public void processOp(Object row, int tag) throws HiveException { fpaths.updaters[++fpaths.acidFileOffset] = HiveFileFormatUtils.getAcidRecordUpdater( jc, conf.getTableInfo(), bucketNum, conf, fpaths.outPaths[fpaths.acidFileOffset], rowInspector, reporter, 0); - LOG.debug("Created updater for bucket number " + bucketNum + " using file " + - fpaths.outPaths[fpaths.acidFileOffset]); + if (isDebugEnabled) { + LOG.debug("Created updater for bucket number " + bucketNum + " using file " + + fpaths.outPaths[fpaths.acidFileOffset]); + } } if (conf.getWriteType() == AcidUtils.Operation.UPDATE) { @@ -834,10 +837,8 @@ protected FSPaths getDynOutPaths(List row, String lbDirName) throws Hive if (dpDir != null) { dpDir = appendToSource(lbDirName, dpDir); pathKey = dpDir; - int numericBucketNum = 0; if(conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { String buckNum = row.get(row.size() - 1); - numericBucketNum = Integer.valueOf(buckNum); taskId = Utilities.replaceTaskIdFromFilename(Utilities.getTaskId(hconf), buckNum); pathKey = appendToSource(taskId, dpDir); } @@ -918,26 +919,6 @@ private String getDynPartDirectory(List row, List dpColNames) { } @Override - public void startGroup() throws HiveException { - if (!conf.getDpSortState().equals(DPSortState.NONE)) { - keyOI = getGroupKeyObjectInspector(); - keys.clear(); - keyWritables.clear(); - ObjectInspectorUtils.partialCopyToStandardObject(keyWritables, getGroupKeyObject(), 0, - numKeyColToRead, (StructObjectInspector) keyOI, ObjectInspectorCopyOption.WRITABLE); - - for (Object o : keyWritables) { - if (o == null || o.toString().length() == 0) { - keys.add(dpCtx.getDefaultPartitionName()); - } else { - keys.add(o.toString()); - } - } - fpaths = getDynOutPaths(keys, null); - } - } - - @Override public void closeOp(boolean abort) throws HiveException { if (!bDynParts && !filesCreated) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index a066f98..3dc7c76 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -498,8 +498,6 @@ protected final void defaultStartGroup() throws HiveException { LOG.debug("Starting group for children:"); for (Operator op : childOperators) { - op.setGroupKeyObjectInspector(groupKeyOI); - op.setGroupKeyObject(groupKeyObject); op.startGroup(); } @@ -970,7 +968,6 @@ protected static StructObjectInspector initEvaluatorsAndReturnStruct( } protected transient Object groupKeyObject; - protected transient ObjectInspector groupKeyOI; public String getOperatorId() { return operatorId; @@ -1287,14 +1284,6 @@ public void setStatistics(Statistics stats) { } } - public void setGroupKeyObjectInspector(ObjectInspector keyObjectInspector) { - this.groupKeyOI = keyObjectInspector; - } - - public ObjectInspector getGroupKeyObjectInspector() { - return groupKeyOI; - } - public static Operator createDummy() { return new DummyOperator(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java index 9bbc4ec..29307e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java @@ -67,6 +67,9 @@ } private static final Log LOG = LogFactory.getLog(ReduceSinkOperator.class.getName()); + private static final boolean isInfoEnabled = LOG.isInfoEnabled(); + private static final boolean isDebugEnabled = LOG.isDebugEnabled(); + private static final boolean isTraceEnabled = LOG.isTraceEnabled(); private static final long serialVersionUID = 1L; private static final MurmurHash hash = (MurmurHash) MurmurHash.getInstance(); @@ -117,6 +120,8 @@ protected transient Object[] cachedValues; protected transient List> distinctColIndices; protected transient Random random; + protected transient int bucketNumber; + /** * This two dimensional array holds key data and a corresponding Union object * which contains the tag identifying the aggregate expression for distinct columns. @@ -144,8 +149,14 @@ protected void initializeOp(Configuration hconf) throws HiveException { try { List keys = conf.getKeyCols(); - LOG.debug("keys size is " + keys.size()); - for (ExprNodeDesc k : keys) LOG.debug("Key exprNodeDesc " + k.getExprString()); + + if (isDebugEnabled) { + LOG.debug("keys size is " + keys.size()); + for (ExprNodeDesc k : keys) { + LOG.debug("Key exprNodeDesc " + k.getExprString()); + } + } + keyEval = new ExprNodeEvaluator[keys.size()]; int i = 0; for (ExprNodeDesc e : keys) { @@ -184,7 +195,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { tag = conf.getTag(); tagByte[0] = (byte) tag; skipTag = conf.getSkipTag(); - LOG.info("Using tag = " + tag); + if (isInfoEnabled) { + LOG.info("Using tag = " + tag); + } TableDesc keyTableDesc = conf.getKeySerializeInfo(); keySerializer = (Serializer) keyTableDesc.getDeserializerClass() @@ -284,7 +297,10 @@ public void processOp(Object row, int tag) throws HiveException { bucketInspector = (IntObjectInspector)bucketField.getFieldObjectInspector(); } - LOG.info("keys are " + conf.getOutputKeyColumnNames() + " num distributions: " + conf.getNumDistributionKeys()); + if (isInfoEnabled) { + LOG.info("keys are " + conf.getOutputKeyColumnNames() + " num distributions: " + + conf.getNumDistributionKeys()); + } keyObjectInspector = initEvaluatorsAndReturnStruct(keyEval, distinctColIndices, conf.getOutputKeyColumnNames(), numDistributionKeys, rowInspector); @@ -304,15 +320,14 @@ public void processOp(Object row, int tag) throws HiveException { populateCachedDistributionKeys(row, 0); // replace bucketing columns with hashcode % numBuckets - int buckNum = -1; if (bucketEval != null) { - buckNum = computeBucketNumber(row, conf.getNumBuckets()); - cachedKeys[0][buckColIdxInKey] = new IntWritable(buckNum); + bucketNumber = computeBucketNumber(row, conf.getNumBuckets()); + cachedKeys[0][buckColIdxInKey] = new IntWritable(bucketNumber); } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) { // In the non-partitioned case we still want to compute the bucket number for updates and // deletes. - buckNum = computeBucketNumber(row, conf.getNumBuckets()); + bucketNumber = computeBucketNumber(row, conf.getNumBuckets()); } HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null); @@ -328,7 +343,7 @@ public void processOp(Object row, int tag) throws HiveException { if (autoParallel && partitionEval.length > 0) { hashCode = computeMurmurHash(firstKey); } else { - hashCode = computeHashCode(row, buckNum); + hashCode = computeHashCode(row); } firstKey.setHashCode(hashCode); @@ -377,7 +392,9 @@ private int computeBucketNumber(Object row, int numBuckets) throws HiveException // column directly. Object recIdValue = acidRowInspector.getStructFieldData(row, recIdField); buckNum = bucketInspector.get(recIdInspector.getStructFieldData(recIdValue, bucketField)); - LOG.debug("Acid choosing bucket number " + buckNum); + if (isTraceEnabled) { + LOG.trace("Acid choosing bucket number " + buckNum); + } } else { for (int i = 0; i < bucketEval.length; i++) { Object o = bucketEval[i].evaluate(row); @@ -422,7 +439,7 @@ protected final int computeMurmurHash(HiveKey firstKey) { return hash.hash(firstKey.getBytes(), firstKey.getDistKeyLength(), 0); } - private int computeHashCode(Object row, int buckNum) throws HiveException { + private int computeHashCode(Object row) throws HiveException { // Evaluate the HashCode int keyHashCode = 0; if (partitionEval.length == 0) { @@ -446,8 +463,10 @@ private int computeHashCode(Object row, int buckNum) throws HiveException { + ObjectInspectorUtils.hashCode(o, partitionObjectInspectors[i]); } } - LOG.debug("Going to return hash code " + (keyHashCode * 31 + buckNum)); - return buckNum < 0 ? keyHashCode : keyHashCode * 31 + buckNum; + if (isTraceEnabled) { + LOG.trace("Going to return hash code " + (keyHashCode * 31 + bucketNumber)); + } + return bucketNumber < 0 ? keyHashCode : keyHashCode * 31 + bucketNumber; } private boolean partitionKeysAreNull(Object row) throws HiveException { @@ -493,10 +512,19 @@ protected void collect(BytesWritable keyWritable, Writable valueWritable) throws } private BytesWritable makeValueWritable(Object row) throws Exception { + int length = valueEval.length; + + // in case of bucketed table, insert the bucket number as the last column in value + if (bucketEval != null) { + length -= 1; + cachedValues[length] = new IntWritable(bucketNumber); + } + // Evaluate the value - for (int i = 0; i < valueEval.length; i++) { + for (int i = 0; i < length; i++) { cachedValues[i] = valueEval[i].evaluate(row); } + // Serialize the value return (BytesWritable) valueSerializer.serialize(cachedValues, valueObjectInspector); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java index c9e469c..9cd8b56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java @@ -66,6 +66,8 @@ public class ExecReducer extends MapReduceBase implements Reducer { private static final Log LOG = LogFactory.getLog("ExecReducer"); + private static final boolean isInfoEnabled = LOG.isInfoEnabled(); + private static final boolean isTraceEnabled = LOG.isTraceEnabled(); private static final String PLAN_KEY = "__REDUCE_PLAN__"; // used to log memory usage periodically @@ -75,7 +77,6 @@ private final Deserializer[] inputValueDeserializer = new Deserializer[Byte.MAX_VALUE]; private final Object[] valueObject = new Object[Byte.MAX_VALUE]; private final List row = new ArrayList(Utilities.reduceFieldNameList.size()); - private final boolean isLogInfoEnabled = LOG.isInfoEnabled(); // TODO: move to DynamicSerDe when it's ready private Deserializer inputKeyDeserializer; @@ -101,16 +102,18 @@ public void configure(JobConf job) { ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE]; ObjectInspector keyObjectInspector; - LOG.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax()); + if (isInfoEnabled) { + LOG.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax()); - try { - LOG.info("conf classpath = " - + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs())); - LOG.info("thread classpath = " - + Arrays.asList(((URLClassLoader) Thread.currentThread() - .getContextClassLoader()).getURLs())); - } catch (Exception e) { - LOG.info("cannot get classpath: " + e.getMessage()); + try { + LOG.info("conf classpath = " + + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs())); + LOG.info("thread classpath = " + + Arrays.asList(((URLClassLoader) Thread.currentThread() + .getContextClassLoader()).getURLs())); + } catch (Exception e) { + LOG.info("cannot get classpath: " + e.getMessage()); + } } jc = job; @@ -147,7 +150,6 @@ public void configure(JobConf job) { ArrayList ois = new ArrayList(); ois.add(keyObjectInspector); ois.add(valueObjectInspector[tag]); - reducer.setGroupKeyObjectInspector(keyObjectInspector); rowObjectInspector[tag] = ObjectInspectorFactory .getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois); } @@ -202,7 +204,9 @@ public void reduce(Object key, Iterator values, OutputCollector output, groupKey = new BytesWritable(); } else { // If a operator wants to do some work at the end of a group - LOG.trace("End Group"); + if (isTraceEnabled) { + LOG.trace("End Group"); + } reducer.endGroup(); } @@ -217,9 +221,11 @@ public void reduce(Object key, Iterator values, OutputCollector output, } groupKey.set(keyWritable.get(), 0, keyWritable.getSize()); - LOG.trace("Start Group"); - reducer.setGroupKeyObject(keyObject); + if (isTraceEnabled) { + LOG.trace("Start Group"); + } reducer.startGroup(); + reducer.setGroupKeyObject(keyObject); } // System.err.print(keyObject.toString()); while (values.hasNext()) { @@ -239,12 +245,14 @@ public void reduce(Object key, Iterator values, OutputCollector output, row.clear(); row.add(keyObject); row.add(valueObject[tag]); - if (isLogInfoEnabled) { + if (isInfoEnabled) { cntr++; if (cntr == nextCntr) { long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed(); - LOG.info("ExecReducer: processing " + cntr - + " rows: used memory = " + used_memory); + if (isInfoEnabled) { + LOG.info("ExecReducer: processing " + cntr + + " rows: used memory = " + used_memory); + } nextCntr = getNextCntr(cntr); } } @@ -290,17 +298,19 @@ private long getNextCntr(long cntr) { public void close() { // No row was processed - if (oc == null) { + if (oc == null && isTraceEnabled) { LOG.trace("Close called without any rows processed"); } try { if (groupKey != null) { // If a operator wants to do some work at the end of a group - LOG.trace("End Group"); + if (isTraceEnabled) { + LOG.trace("End Group"); + } reducer.endGroup(); } - if (isLogInfoEnabled) { + if (isInfoEnabled) { LOG.info("ExecReducer: processed " + cntr + " rows: used memory = " + memoryMXBean.getHeapMemoryUsage().getUsed()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 1ca62da..1ac6353 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -127,7 +127,6 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT .getDeserializerClass(), null); SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null); keyObjectInspector = inputKeyDeserializer.getObjectInspector(); - reducer.setGroupKeyObjectInspector(keyObjectInspector); if(vectorized) { keyStructInspector = (StructObjectInspector) keyObjectInspector; @@ -233,8 +232,8 @@ public boolean pushRecord() throws HiveException { } groupKey.set(keyWritable.getBytes(), 0, keyWritable.getLength()); - reducer.setGroupKeyObject(keyObject); reducer.startGroup(); + reducer.setGroupKeyObject(keyObject); } /* this.keyObject passed via reference */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java index c096414..2146b39 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java @@ -270,10 +270,9 @@ public void processOp(Object row, int tag) throws HiveException { populatedCachedDistributionKeys(vrg, rowIndex, 0); // replace bucketing columns with hashcode % numBuckets - int buckNum = -1; if (bucketEval != null) { - buckNum = computeBucketNumber(vrg, rowIndex, conf.getNumBuckets()); - cachedKeys[0][buckColIdxInKey] = new IntWritable(buckNum); + bucketNumber = computeBucketNumber(vrg, rowIndex, conf.getNumBuckets()); + cachedKeys[0][buckColIdxInKey] = new IntWritable(bucketNumber); } HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null); int distKeyLength = firstKey.getDistKeyLength(); @@ -289,7 +288,7 @@ public void processOp(Object row, int tag) throws HiveException { if (autoParallel && partitionEval.length > 0) { hashCode = computeMurmurHash(firstKey); } else { - hashCode = computeHashCode(vrg, rowIndex, buckNum); + hashCode = computeHashCode(vrg, rowIndex); } firstKey.setHashCode(hashCode); @@ -417,7 +416,15 @@ private void populateCachedDistinctKeys( private BytesWritable makeValueWritable(VectorizedRowBatch vrg, int rowIndex) throws HiveException, SerDeException { - for (int i = 0; i < valueEval.length; i++) { + int length = valueEval.length; + + // in case of bucketed table, insert the bucket number as the last column in value + if (bucketEval != null) { + length -= 1; + cachedValues[length] = new IntWritable(bucketNumber); + } + + for (int i = 0; i < length; i++) { int batchColumn = valueEval[i].getOutputColumn(); ColumnVector vectorColumn = vrg.cols[batchColumn]; cachedValues[i] = valueWriters[i].writeValue(vectorColumn, rowIndex); @@ -426,7 +433,7 @@ private BytesWritable makeValueWritable(VectorizedRowBatch vrg, int rowIndex) return (BytesWritable)valueSerializer.serialize(cachedValues, valueObjectInspector); } - private int computeHashCode(VectorizedRowBatch vrg, int rowIndex, int buckNum) throws HiveException { + private int computeHashCode(VectorizedRowBatch vrg, int rowIndex) throws HiveException { // Evaluate the HashCode int keyHashCode = 0; if (partitionEval.length == 0) { @@ -449,7 +456,7 @@ private int computeHashCode(VectorizedRowBatch vrg, int rowIndex, int buckNum) t partitionWriters[p].getObjectInspector()); } } - return buckNum < 0 ? keyHashCode : keyHashCode * 31 + buckNum; + return bucketNumber < 0 ? keyHashCode : keyHashCode * 31 + bucketNumber; } private boolean partitionKeysAreNull(VectorizedRowBatch vrg, int rowIndex) diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index cbb59ae..e76eac0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -85,6 +85,7 @@ */ public class SortedDynPartitionOptimizer implements Transform { + private static final String BUCKET_NUMBER_COL_NAME = "_bucket_number"; @Override public ParseContext transform(ParseContext pCtx) throws SemanticException { @@ -216,6 +217,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder, newValueCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); + if (!bucketColumns.isEmpty()) { + String tableAlias = outRR.getColumnInfos().get(0).getTabAlias(); + ColumnInfo ci = new ColumnInfo(BUCKET_NUMBER_COL_NAME, TypeInfoFactory.intTypeInfo, + tableAlias, true, true); + outRR.put(tableAlias, BUCKET_NUMBER_COL_NAME, ci); + } + // Create ReduceSink operator ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( OperatorFactory.getAndMakeChild(rsConf, new RowSchema(outRR.getColumnInfos()), fsParent), @@ -380,8 +388,11 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, // corresponding with bucket number and hence their OIs for (Integer idx : keyColsPosInVal) { if (idx < 0) { - newKeyCols.add(new ExprNodeConstantDesc(TypeInfoFactory - .getPrimitiveTypeInfoFromPrimitiveWritable(IntWritable.class), -1)); + // add bucket number column to both key and value + ExprNodeConstantDesc encd = new ExprNodeConstantDesc(TypeInfoFactory + .getPrimitiveTypeInfoFromPrimitiveWritable(IntWritable.class), -1); + newKeyCols.add(encd); + newValueCols.add(encd); } else { newKeyCols.add(newValueCols.get(idx).clone()); } @@ -417,6 +428,9 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, List outCols = Utilities.getInternalColumnNamesFromSignature(parent.getSchema() .getSignature()); ArrayList outValColNames = Lists.newArrayList(outCols); + if (!bucketColumns.isEmpty()) { + outValColNames.add(BUCKET_NUMBER_COL_NAME); + } List valFields = PlanUtils.getFieldSchemasFromColumnList(newValueCols, outValColNames, 0, ""); TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields); diff --git ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q new file mode 100644 index 0000000..adba1d2 --- /dev/null +++ ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q @@ -0,0 +1,200 @@ +set hive.optimize.sort.dynamic.partition=true; +set hive.exec.dynamic.partition=true; +set hive.exec.max.dynamic.partitions=1000; +set hive.exec.max.dynamic.partitions.pernode=1000; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.enforce.bucketing=false; +set hive.enforce.sorting=false; +set hive.exec.submitviachild=true; +set hive.exec.submit.local.task.via.child=true; + +drop table ss; +drop table ss_orc; +drop table ss_part; +drop table ss_part_orc; + +create table ss ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float); + +create table ss_part ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int); + +load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss; + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk; + +insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk; + +desc formatted ss_part partition(ss_sold_date_sk=2452617); +select * from ss_part where ss_sold_date_sk=2452617; + +desc formatted ss_part partition(ss_sold_date_sk=2452638); +select * from ss_part where ss_sold_date_sk=2452638; + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk; + +insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk; + +desc formatted ss_part partition(ss_sold_date_sk=2452617); +select * from ss_part where ss_sold_date_sk=2452617; + +desc formatted ss_part partition(ss_sold_date_sk=2452638); +select * from ss_part where ss_sold_date_sk=2452638; + +set hive.optimize.sort.dynamic.partition=false; +-- SORT DYNAMIC PARTITION DISABLED + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk; + +insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk; + +desc formatted ss_part partition(ss_sold_date_sk=2452617); +select * from ss_part where ss_sold_date_sk=2452617; + +desc formatted ss_part partition(ss_sold_date_sk=2452638); +select * from ss_part where ss_sold_date_sk=2452638; + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk; + +insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk; + +desc formatted ss_part partition(ss_sold_date_sk=2452617); +select * from ss_part where ss_sold_date_sk=2452617; + +desc formatted ss_part partition(ss_sold_date_sk=2452638); +select * from ss_part where ss_sold_date_sk=2452638; + +set hive.vectorized.execution.enabled=true; +-- VECTORIZATION IS ENABLED + +create table ss_orc ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) stored as orc; + +create table ss_part_orc ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) stored as orc; + +insert overwrite table ss_orc select * from ss; + +drop table ss; +drop table ss_part; + +explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk; + +insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk; + +desc formatted ss_part_orc partition(ss_sold_date_sk=2452617); +select * from ss_part_orc where ss_sold_date_sk=2452617; + +desc formatted ss_part_orc partition(ss_sold_date_sk=2452638); +select * from ss_part_orc where ss_sold_date_sk=2452638; + +explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk; + +insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk; + +desc formatted ss_part_orc partition(ss_sold_date_sk=2452617); +select * from ss_part_orc where ss_sold_date_sk=2452617; + +desc formatted ss_part_orc partition(ss_sold_date_sk=2452638); +select * from ss_part_orc where ss_sold_date_sk=2452638; + +drop table ss_orc; +drop table ss_part_orc; \ No newline at end of file diff --git ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index 04cf000..042a3be 100644 --- ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -346,7 +346,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reduce Operator Tree: Extract @@ -403,7 +403,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reduce Operator Tree: Extract @@ -695,7 +695,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reduce Operator Tree: Extract @@ -752,7 +752,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reduce Operator Tree: Extract @@ -1819,7 +1819,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reduce Operator Tree: Extract diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out index 941aa9e..c50322e 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out @@ -279,7 +279,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reduce Operator Tree: Extract Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE @@ -335,7 +335,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reduce Operator Tree: Extract Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE @@ -602,7 +602,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reduce Operator Tree: Extract Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE @@ -658,7 +658,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reduce Operator Tree: Extract Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE @@ -1721,7 +1721,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reduce Operator Tree: Extract Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out new file mode 100644 index 0000000..f08fe32 --- /dev/null +++ ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out @@ -0,0 +1,1520 @@ +PREHOOK: query: drop table ss +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ss_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ss_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ss_part_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss_part_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ss ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss +POSTHOOK: query: create table ss ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss +PREHOOK: query: create table ss_part ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss_part +POSTHOOK: query: create table ss_part ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss_part +PREHOOK: query: load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ss +POSTHOOK: query: load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ss +PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +2.1 -2026.3 2452617 +2.99 -11.32 2452617 +85.8 25.61 2452617 +552.96 -1363.84 2452617 +565.92 196.48 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +7412.83 2071.68 2452617 +10022.63 3952.8 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +0.15 -241.22 2452638 +150.39 -162.12 2452638 +156.67 -4626.56 2452638 +181.03 -207.24 2452638 +267.01 -3266.36 2452638 +317.87 -3775.38 2452638 +1327.08 57.97 2452638 +1413.19 178.08 2452638 +1524.33 494.37 2452638 +1971.35 -488.25 2452638 +4133.98 -775.72 2452638 +4329.49 -4000.51 2452638 +10171.1 660.48 2452638 +PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +10022.63 3952.8 2452617 +2.99 -11.32 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +565.92 196.48 2452617 +85.8 25.61 2452617 +7412.83 2071.68 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +552.96 -1363.84 2452617 +2.1 -2026.3 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +4329.49 -4000.51 2452638 +1413.19 178.08 2452638 +150.39 -162.12 2452638 +1524.33 494.37 2452638 +0.15 -241.22 2452638 +267.01 -3266.36 2452638 +181.03 -207.24 2452638 +1971.35 -488.25 2452638 +1327.08 57.97 2452638 +156.67 -4626.56 2452638 +317.87 -3775.38 2452638 +10171.1 660.48 2452638 +4133.98 -775.72 2452638 +PREHOOK: query: -- SORT DYNAMIC PARTITION DISABLED + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT DYNAMIC PARTITION DISABLED + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +2.1 -2026.3 2452617 +2.99 -11.32 2452617 +85.8 25.61 2452617 +552.96 -1363.84 2452617 +565.92 196.48 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +7412.83 2071.68 2452617 +10022.63 3952.8 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +0.15 -241.22 2452638 +150.39 -162.12 2452638 +156.67 -4626.56 2452638 +181.03 -207.24 2452638 +267.01 -3266.36 2452638 +317.87 -3775.38 2452638 +1327.08 57.97 2452638 +1413.19 178.08 2452638 +1524.33 494.37 2452638 +1971.35 -488.25 2452638 +4133.98 -775.72 2452638 +4329.49 -4000.51 2452638 +10171.1 660.48 2452638 +PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +565.92 196.48 2452617 +85.8 25.61 2452617 +7412.83 2071.68 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +552.96 -1363.84 2452617 +2.1 -2026.3 2452617 +10022.63 3952.8 2452617 +2.99 -11.32 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +4329.49 -4000.51 2452638 +1413.19 178.08 2452638 +150.39 -162.12 2452638 +1524.33 494.37 2452638 +0.15 -241.22 2452638 +267.01 -3266.36 2452638 +181.03 -207.24 2452638 +1971.35 -488.25 2452638 +1327.08 57.97 2452638 +156.67 -4626.56 2452638 +317.87 -3775.38 2452638 +10171.1 660.48 2452638 +4133.98 -775.72 2452638 +PREHOOK: query: -- VECTORIZATION IS ENABLED + +create table ss_orc ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss_orc +POSTHOOK: query: -- VECTORIZATION IS ENABLED + +create table ss_orc ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss_orc +PREHOOK: query: create table ss_part_orc ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: create table ss_part_orc ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss_part_orc +PREHOOK: query: insert overwrite table ss_orc select * from ss +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_orc +POSTHOOK: query: insert overwrite table ss_orc select * from ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_orc +POSTHOOK: Lineage: ss_orc.ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_orc.ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_orc.ss_sold_date_sk SIMPLE [(ss)ss.FieldSchema(name:ss_sold_date_sk, type:int, comment:null), ] +PREHOOK: query: drop table ss +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss +POSTHOOK: query: drop table ss +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss +PREHOOK: query: drop table ss_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss_part +PREHOOK: Output: default@ss_part +POSTHOOK: query: drop table ss_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss_part +POSTHOOK: Output: default@ss_part +PREHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss_orc + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_orc +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_orc +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 88 + totalSize 417 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +2.1 -2026.3 2452617 +2.99 -11.32 2452617 +85.8 25.61 2452617 +552.96 -1363.84 2452617 +565.92 196.48 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +7412.83 2071.68 2452617 +10022.63 3952.8 2452617 +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 104 + totalSize 440 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +0.15 -241.22 2452638 +150.39 -162.12 2452638 +156.67 -4626.56 2452638 +181.03 -207.24 2452638 +267.01 -3266.36 2452638 +317.87 -3775.38 2452638 +1327.08 57.97 2452638 +1413.19 178.08 2452638 +1524.33 494.37 2452638 +1971.35 -488.25 2452638 +4133.98 -775.72 2452638 +4329.49 -4000.51 2452638 +10171.1 660.48 2452638 +PREHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss_orc + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_orc +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_orc +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 88 + totalSize 417 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +565.92 196.48 2452617 +85.8 25.61 2452617 +7412.83 2071.68 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +552.96 -1363.84 2452617 +2.1 -2026.3 2452617 +10022.63 3952.8 2452617 +2.99 -11.32 2452617 +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 104 + totalSize 440 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +4329.49 -4000.51 2452638 +1413.19 178.08 2452638 +150.39 -162.12 2452638 +1524.33 494.37 2452638 +0.15 -241.22 2452638 +267.01 -3266.36 2452638 +181.03 -207.24 2452638 +1971.35 -488.25 2452638 +1327.08 57.97 2452638 +156.67 -4626.56 2452638 +317.87 -3775.38 2452638 +10171.1 660.48 2452638 +4133.98 -775.72 2452638 +PREHOOK: query: drop table ss_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss_orc +PREHOOK: Output: default@ss_orc +POSTHOOK: query: drop table ss_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss_orc +POSTHOOK: Output: default@ss_orc +PREHOOK: query: drop table ss_part_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss_part_orc +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: drop table ss_part_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Output: default@ss_part_orc diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index 53dfb2c..6297461 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -354,7 +354,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -421,7 +421,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -725,7 +725,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -792,7 +792,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -1890,7 +1890,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out index e2f8673..bd5fd1c 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out @@ -295,7 +295,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reducer 2 Reduce Operator Tree: Extract @@ -361,7 +361,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reducer 2 Reduce Operator Tree: Extract @@ -648,7 +648,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reducer 2 Reduce Operator Tree: Extract @@ -714,7 +714,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reducer 2 Reduce Operator Tree: Extract @@ -1807,7 +1807,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), -1 (type: int) Reducer 2 Reduce Operator Tree: Extract diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out new file mode 100644 index 0000000..1050f7f --- /dev/null +++ ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out @@ -0,0 +1,1580 @@ +PREHOOK: query: drop table ss +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ss_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ss_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ss_part_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ss_part_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ss ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss +POSTHOOK: query: create table ss ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss +PREHOOK: query: create table ss_part ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss_part +POSTHOOK: query: create table ss_part ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss_part +PREHOOK: query: load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ss +POSTHOOK: query: load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ss +PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +2.1 -2026.3 2452617 +2.99 -11.32 2452617 +85.8 25.61 2452617 +552.96 -1363.84 2452617 +565.92 196.48 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +7412.83 2071.68 2452617 +10022.63 3952.8 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +0.15 -241.22 2452638 +150.39 -162.12 2452638 +156.67 -4626.56 2452638 +181.03 -207.24 2452638 +267.01 -3266.36 2452638 +317.87 -3775.38 2452638 +1327.08 57.97 2452638 +1413.19 178.08 2452638 +1524.33 494.37 2452638 +1971.35 -488.25 2452638 +4133.98 -775.72 2452638 +4329.49 -4000.51 2452638 +10171.1 660.48 2452638 +PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +10022.63 3952.8 2452617 +2.99 -11.32 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +565.92 196.48 2452617 +85.8 25.61 2452617 +7412.83 2071.68 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +552.96 -1363.84 2452617 +2.1 -2026.3 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +4329.49 -4000.51 2452638 +1413.19 178.08 2452638 +150.39 -162.12 2452638 +1524.33 494.37 2452638 +0.15 -241.22 2452638 +267.01 -3266.36 2452638 +181.03 -207.24 2452638 +1971.35 -488.25 2452638 +1327.08 57.97 2452638 +156.67 -4626.56 2452638 +317.87 -3775.38 2452638 +10171.1 660.48 2452638 +4133.98 -775.72 2452638 +PREHOOK: query: -- SORT DYNAMIC PARTITION DISABLED + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT DYNAMIC PARTITION DISABLED + +explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +2.1 -2026.3 2452617 +2.99 -11.32 2452617 +85.8 25.61 2452617 +552.96 -1363.84 2452617 +565.92 196.48 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +7412.83 2071.68 2452617 +10022.63 3952.8 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +0.15 -241.22 2452638 +150.39 -162.12 2452638 +156.67 -4626.56 2452638 +181.03 -207.24 2452638 +267.01 -3266.36 2452638 +317.87 -3775.38 2452638 +1327.08 57.97 2452638 +1413.19 178.08 2452638 +1524.33 494.37 2452638 +1971.35 -488.25 2452638 +4133.98 -775.72 2452638 +4329.49 -4000.51 2452638 +10171.1 660.48 2452638 +PREHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 46 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_part +POSTHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 151 + totalSize 162 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +565.92 196.48 2452617 +85.8 25.61 2452617 +7412.83 2071.68 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +552.96 -1363.84 2452617 +2.1 -2026.3 2452617 +10022.63 3952.8 2452617 +2.99 -11.32 2452617 +PREHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part +POSTHOOK: query: desc formatted ss_part partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 186 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part +PREHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part +POSTHOOK: Input: default@ss_part@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +4329.49 -4000.51 2452638 +1413.19 178.08 2452638 +150.39 -162.12 2452638 +1524.33 494.37 2452638 +0.15 -241.22 2452638 +267.01 -3266.36 2452638 +181.03 -207.24 2452638 +1971.35 -488.25 2452638 +1327.08 57.97 2452638 +156.67 -4626.56 2452638 +317.87 -3775.38 2452638 +10171.1 660.48 2452638 +4133.98 -775.72 2452638 +PREHOOK: query: -- VECTORIZATION IS ENABLED + +create table ss_orc ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss_orc +POSTHOOK: query: -- VECTORIZATION IS ENABLED + +create table ss_orc ( +ss_sold_date_sk int, +ss_net_paid_inc_tax float, +ss_net_profit float) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss_orc +PREHOOK: query: create table ss_part_orc ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: create table ss_part_orc ( +ss_net_paid_inc_tax float, +ss_net_profit float) +partitioned by (ss_sold_date_sk int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss_part_orc +PREHOOK: query: insert overwrite table ss_orc select * from ss +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss_orc +POSTHOOK: query: insert overwrite table ss_orc select * from ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss_orc +POSTHOOK: Lineage: ss_orc.ss_net_paid_inc_tax SIMPLE [(ss)ss.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_orc.ss_net_profit SIMPLE [(ss)ss.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_orc.ss_sold_date_sk SIMPLE [(ss)ss.FieldSchema(name:ss_sold_date_sk, type:int, comment:null), ] +PREHOOK: query: drop table ss +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss +POSTHOOK: query: drop table ss +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss +PREHOOK: query: drop table ss_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss_part +PREHOOK: Output: default@ss_part +POSTHOOK: query: drop table ss_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss_part +POSTHOOK: Output: default@ss_part +PREHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss_orc + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_orc +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + group by ss_sold_date_sk, + ss_net_paid_inc_tax, + ss_net_profit + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_orc +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 88 + totalSize 417 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +2.1 -2026.3 2452617 +2.99 -11.32 2452617 +85.8 25.61 2452617 +552.96 -1363.84 2452617 +565.92 196.48 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +7412.83 2071.68 2452617 +10022.63 3952.8 2452617 +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 104 + totalSize 440 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +0.15 -241.22 2452638 +150.39 -162.12 2452638 +156.67 -4626.56 2452638 +181.03 -207.24 2452638 +267.01 -3266.36 2452638 +317.87 -3775.38 2452638 +1327.08 57.97 2452638 +1413.19 178.08 2452638 +1524.33 494.37 2452638 +1971.35 -488.25 2452638 +4133.98 -775.72 2452638 +4329.49 -4000.51 2452638 +10171.1 660.48 2452638 +PREHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss_orc + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ss_sold_date_sk + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.ss_part_orc + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_orc +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) +select ss_net_paid_inc_tax, + ss_net_profit, + ss_sold_date_sk + from ss_orc + where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638 + distribute by ss_sold_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_orc +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452617 +POSTHOOK: Output: default@ss_part_orc@ss_sold_date_sk=2452638 +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452617).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_paid_inc_tax SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] +POSTHOOK: Lineage: ss_part_orc PARTITION(ss_sold_date_sk=2452638).ss_net_profit SIMPLE [(ss_orc)ss_orc.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452617) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452617] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 11 + rawDataSize 88 + totalSize 417 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452617 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452617 +#### A masked pattern was here #### +3423.95 -3164.07 2452617 +5362.01 -600.28 2452617 +565.92 196.48 2452617 +85.8 25.61 2452617 +7412.83 2071.68 2452617 +879.07 -2185.76 2452617 +1765.07 -4648.8 2452617 +552.96 -1363.84 2452617 +2.1 -2026.3 2452617 +10022.63 3952.8 2452617 +2.99 -11.32 2452617 +PREHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@ss_part_orc +POSTHOOK: query: desc formatted ss_part_orc partition(ss_sold_date_sk=2452638) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@ss_part_orc +# col_name data_type comment + +ss_net_paid_inc_tax float +ss_net_profit float + +# Partition Information +# col_name data_type comment + +ss_sold_date_sk int + +# Detailed Partition Information +Partition Value: [2452638] +Database: default +Table: ss_part_orc +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 13 + rawDataSize 104 + totalSize 440 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_part_orc +PREHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +POSTHOOK: query: select * from ss_part_orc where ss_sold_date_sk=2452638 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Input: default@ss_part_orc@ss_sold_date_sk=2452638 +#### A masked pattern was here #### +4329.49 -4000.51 2452638 +1413.19 178.08 2452638 +150.39 -162.12 2452638 +1524.33 494.37 2452638 +0.15 -241.22 2452638 +267.01 -3266.36 2452638 +181.03 -207.24 2452638 +1971.35 -488.25 2452638 +1327.08 57.97 2452638 +156.67 -4626.56 2452638 +317.87 -3775.38 2452638 +10171.1 660.48 2452638 +4133.98 -775.72 2452638 +PREHOOK: query: drop table ss_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss_orc +PREHOOK: Output: default@ss_orc +POSTHOOK: query: drop table ss_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss_orc +POSTHOOK: Output: default@ss_orc +PREHOOK: query: drop table ss_part_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ss_part_orc +PREHOOK: Output: default@ss_part_orc +POSTHOOK: query: drop table ss_part_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ss_part_orc +POSTHOOK: Output: default@ss_part_orc