Index: build-common.xml
===================================================================
--- build-common.xml (revision 1438869)
+++ build-common.xml (working copy)
@@ -57,7 +57,7 @@
-
+
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1438869)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -37,8 +37,6 @@
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.LogUtils;
-import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.JobConf;
@@ -673,6 +671,17 @@
HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES(
"hive.multi.insert.move.tasks.share.dependencies", false),
+ // If this is set, when writing partitions, the metadata will include the bucketing/sorting
+ // properties with which the data was written if any (this will not overwrite the metadata
+ // inherited from the table if the table is bucketed/sorted)
+ HIVE_INFER_BUCKET_SORT("hive.exec.infer.bucket.sort", false),
+ // If this is set, when setting the number of reducers for the map reduce task which writes the
+ // final output files, it will choose a number which is a power of two. The number of reducers
+ // may be set to a power of two, only to be followed by a merge task meaning preventing
+ // anything from being inferred.
+ HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO(
+ "hive.exec.infer.bucket.sort.num.buckets.power.two", false),
+
/* The following section contains all configurations used for list bucketing feature.*/
/* This is not for clients. but only for block merge task. */
/* This is used by BlockMergeTask to send out flag to RCFileMergeMapper */
Index: conf/hive-default.xml.template
===================================================================
--- conf/hive-default.xml.template (revision 1438869)
+++ conf/hive-default.xml.template (working copy)
@@ -1652,6 +1652,39 @@
+ hive.exec.infer.bucket.sort
+ false
+
+ If this is set, when writing partitions, the metadata will include the bucketing/sorting
+ properties with which the data was written if any (this will not overwrite the metadata
+ inherited from the table if the table is bucketed/sorted)
+
+
+
+
+ hive.exec.infer.bucket.sort.num.buckets.power.two
+ false
+
+ If this is set, when setting the number of reducers for the map reduce task which writes the
+ final output files, it will choose a number which is a power of two, unless the user specifies
+ the number of reducers to use using mapred.reduce.tasks. The number of reducers
+ may be set to a power of two, only to be followed by a merge task meaning preventing
+ anything from being inferred.
+ With hive.exec.infer.bucket.sort set to true:
+ Advantages: If this is not set, the number of buckets for partitions will seem arbitrary,
+ which means that the number of mappers used for optimized joins, for example, will
+ be very low. With this set, since the number of buckets used for any partition is
+ a power of two, the number of mappers used for optimized joins will be the least
+ number of buckets used by any partition being joined.
+ Disadvantages: This may mean a much larger or much smaller number of reducers being used in the
+ final map reduce job, e.g. if a job was originally going to take 257 reducers,
+ it will now take 512 reducers, similarly if the max number of reducers is 511,
+ and a job was going to use this many, it will now use 256 reducers.
+
+
+
+
+
hive.groupby.orderby.position.alias
false
Whether to enable using Column Position Alias in Group By or Order By
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (working copy)
@@ -91,6 +91,12 @@
//recursively remove this task from its children's parent task
tsk.removeFromChildrenTasks();
} else {
+ if (getParentTasks() != null) {
+ // This makes it so that we can go back up the tree later
+ for (Task extends Serializable> task : getParentTasks()) {
+ task.addDependentTask(tsk);
+ }
+ }
// resolved task
if (!driverContext.getRunnable().contains(tsk)) {
console.printInfo(tsk.getId() + " is selected by condition resolver.");
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (working copy)
@@ -49,4 +49,16 @@
public OperatorType getType() {
return OperatorType.EXTRACT;
}
+
+ /**
+ * @return the name of the operator
+ */
+ @Override
+ public String getName() {
+ return getOperatorName();
+ }
+
+ static public String getOperatorName() {
+ return "EX";
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java (working copy)
@@ -40,4 +40,16 @@
public OperatorType getType() {
return OperatorType.FORWARD;
}
+
+ /**
+ * @return the name of the operator
+ */
+ @Override
+ public String getName() {
+ return getOperatorName();
+ }
+
+ static public String getOperatorName() {
+ return "FOR";
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (working copy)
@@ -422,6 +422,10 @@
* Estimate the number of reducers needed for this job, based on job input,
* and configuration parameters.
*
+ * The output of this method should only be used if the output of this
+ * MapRedTask is not being used to populate a bucketed table and the user
+ * has not specified the number of reducers to use.
+ *
* @return the number of reducers.
*/
private int estimateNumberOfReducers() throws IOException {
@@ -447,6 +451,30 @@
int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
reducers = Math.max(1, reducers);
reducers = Math.min(maxReducers, reducers);
+
+ // If this map reduce job writes final data to a table and bucketing is being inferred,
+ // and the user has configured Hive to do this, make sure the number of reducers is a
+ // power of two
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) &&
+ work.isFinalMapRed() && !work.getBucketedColsByDirectory().isEmpty()) {
+
+ int reducersLog = (int)(Math.log(reducers) / Math.log(2)) + 1;
+ int reducersPowerTwo = (int)Math.pow(2, reducersLog);
+
+ // If the original number of reducers was a power of two, use that
+ if (reducersPowerTwo / 2 == reducers) {
+ return reducers;
+ } else if (reducersPowerTwo > maxReducers) {
+ // If the next power of two greater than the original number of reducers is greater
+ // than the max number of reducers, use the preceding power of two, which is strictly
+ // less than the original number of reducers and hence the max
+ reducers = reducersPowerTwo / 2;
+ } else {
+ // Otherwise use the smallest power of two greater than the original number of reducers
+ reducers = reducersPowerTwo;
+ }
+ }
+
return reducers;
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (working copy)
@@ -35,21 +35,28 @@
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
+import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol;
+import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
@@ -276,6 +283,48 @@
}
} else {
LOG.info("Partition is: " + tbd.getPartitionSpec().toString());
+
+ // Check if the bucketing and/or sorting columns were inferred
+ List bucketCols = null;
+ List sortCols = null;
+ int numBuckets = -1;
+ Task task = this;
+ String path = tbd.getSourceDir();
+ // Find the first ancestor of this MoveTask which is some form of map reduce task
+ // (Either standard, local, or a merge)
+ while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
+ task = (Task)task.getParentTasks().get(0);
+ // If it was a merge task or a local map reduce task, nothing can be inferred
+ if (task instanceof BlockMergeTask || task instanceof MapredLocalTask) {
+ break;
+ }
+
+ // If it's a standard map reduce task, check what, if anything, it inferred about
+ // the directory this move task is moving
+ if (task instanceof MapRedTask) {
+ MapredWork work = (MapredWork)task.getWork();
+ bucketCols = work.getBucketedColsByDirectory().get(path);
+ sortCols = work.getSortedColsByDirectory().get(path);
+ numBuckets = work.getNumReduceTasks();
+ if (bucketCols != null || sortCols != null) {
+ // This must be a final map reduce task (the task containing the file sink
+ // operator that writes the final output)
+ assert work.isFinalMapRed();
+ }
+ break;
+ }
+
+ // If it's a move task, get the path the files were moved from, this is what any
+ // preceding map reduce task inferred information about, and moving does not invalidate
+ // those assumptions
+ // This can happen when a conditional merge is added before the final MoveTask, but the
+ // condition for merging is not met, see GenMRFileSink1.
+ if (task instanceof MoveTask) {
+ if (((MoveTask)task).getWork().getLoadFileWork() != null) {
+ path = ((MoveTask)task).getWork().getLoadFileWork().getSourceDir();
+ }
+ }
+ }
// deal with dynamic partitions
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions
@@ -314,6 +363,10 @@
for (LinkedHashMap partSpec: dp) {
Partition partn = db.getPartition(table, partSpec, false);
+ if (bucketCols != null || sortCols != null) {
+ updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols);
+ }
+
WriteEntity enty = new WriteEntity(partn, true);
if (work.getOutputs() != null) {
work.getOutputs().add(enty);
@@ -344,6 +397,11 @@
tbd.getPartitionSpec(), tbd.getReplace(), tbd.getHoldDDLTime(),
tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd));
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
+
+ if (bucketCols != null || sortCols != null) {
+ updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols);
+ }
+
dc = new DataContainer(table.getTTable(), partn.getTPartition());
// add this partition to post-execution hook
if (work.getOutputs() != null) {
@@ -371,6 +429,77 @@
.isSkewedStoredAsDir();
}
+ /**
+ * Alters the bucketing and/or sorting columns of the partition provided they meet some
+ * validation criteria, e.g. the number of buckets match the number of files, and the
+ * columns are not partition columns
+ * @param table
+ * @param partn
+ * @param bucketCols
+ * @param numBuckets
+ * @param sortCols
+ * @throws IOException
+ * @throws InvalidOperationException
+ * @throws HiveException
+ */
+ private void updatePartitionBucketSortColumns(Table table, Partition partn,
+ List bucketCols, int numBuckets, List sortCols)
+ throws IOException, InvalidOperationException, HiveException {
+
+ boolean updateBucketCols = false;
+ if (bucketCols != null) {
+ FileSystem fileSys = partn.getPartitionPath().getFileSystem(conf);
+ FileStatus[] fileStatus = Utilities.getFileStatusRecurse(
+ partn.getPartitionPath(), 1, fileSys);
+ // Verify the number of buckets equals the number of files
+ if (fileStatus.length == numBuckets) {
+ List newBucketCols = new ArrayList();
+ updateBucketCols = true;
+ for (BucketCol bucketCol : bucketCols) {
+ if (bucketCol.getIndexes().get(0) < partn.getCols().size()) {
+ newBucketCols.add(partn.getCols().get(
+ bucketCol.getIndexes().get(0)).getName());
+ } else {
+ // If the table is bucketed on a partition column, not valid for bucketing
+ updateBucketCols = false;
+ break;
+ }
+ }
+ if (updateBucketCols) {
+ partn.getBucketCols().clear();
+ partn.getBucketCols().addAll(newBucketCols);
+ partn.getTPartition().getSd().setNumBuckets(numBuckets);
+ }
+ }
+ }
+
+ boolean updateSortCols = false;
+ if (sortCols != null) {
+ List newSortCols = new ArrayList();
+ updateSortCols = true;
+ for (SortCol sortCol : sortCols) {
+ if (sortCol.getIndexes().get(0) < partn.getCols().size()) {
+ newSortCols.add(new Order(
+ partn.getCols().get(sortCol.getIndexes().get(0)).getName(),
+ sortCol.getSortOrder() == '+' ? BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC :
+ BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC));
+ } else {
+ // If the table is sorted on a partition column, not valid for sorting
+ updateSortCols = false;
+ break;
+ }
+ }
+ if (updateSortCols) {
+ partn.getSortCols().clear();
+ partn.getSortCols().addAll(newSortCols);
+ }
+ }
+
+ if (updateBucketCols || updateSortCols) {
+ db.alterPartition(table.getDbName(), table.getTableName(), partn);
+ }
+ }
+
/*
* Does the move task involve moving to a local file system
*/
Index: ql/src/java/org/apache/hadoop/hive/ql/lib/RuleExactMatch.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/lib/RuleExactMatch.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/lib/RuleExactMatch.java (working copy)
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.lib;
+
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Implentation of the Rule interface for Nodes Used in Node dispatching to dispatch
+ * process/visitor functions for Nodes. The cost method returns 1 if there is an exact
+ * match between the expression and the stack, otherwise -1.
+ */
+public class RuleExactMatch implements Rule {
+
+ private final String ruleName;
+ private final String pattern;
+
+ /**
+ * The rule specified as operator names separated by % symbols, the left side represents the
+ * bottom of the stack.
+ *
+ * E.g. TS%FIL%RS -> means
+ * TableScan Node followed by Filter followed by ReduceSink in the tree, or, in terms of the
+ * stack, ReduceSink on top followed by Filter followed by TableScan
+ *
+ * @param ruleName
+ * name of the rule
+ * @param regExp
+ * string specification of the rule
+ **/
+ public RuleExactMatch(String ruleName, String pattern) {
+ this.ruleName = ruleName;
+ this.pattern = pattern;
+ }
+
+ /**
+ * This function returns the cost of the rule for the specified stack. Returns 1 if there is
+ * an exact match, otherwise -1
+ *
+ * @param stack
+ * Node stack encountered so far
+ * @return cost of the function
+ * @throws SemanticException
+ */
+ public int cost(Stack stack) throws SemanticException {
+ int numElems = (stack != null ? stack.size() : 0);
+ String name = new String();
+ for (int pos = numElems - 1; pos >= 0; pos--) {
+ name = stack.get(pos).getName() + "%" + name;
+ }
+
+ if (pattern.equals(name)) {
+ return 1;
+ }
+
+ return -1;
+ }
+
+ /**
+ * @return the name of the Node
+ **/
+ public String getName() {
+ return ruleName;
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy)
@@ -409,7 +409,7 @@
}
/**
- * Updates the existing table metadata with the new metadata.
+ * Updates the existing partition metadata with the new metadata.
*
* @param tblName
* name of the existing table
@@ -422,13 +422,30 @@
public void alterPartition(String tblName, Partition newPart)
throws InvalidOperationException, HiveException {
Table t = newTable(tblName);
+ alterPartition(t.getDbName(), t.getTableName(), newPart);
+ }
+
+ /**
+ * Updates the existing partition metadata with the new metadata.
+ *
+ * @param dbName
+ * name of the exiting table's database
+ * @param tblName
+ * name of the existing table
+ * @param newPart
+ * new partition
+ * @throws InvalidOperationException
+ * if the changes in metadata is not acceptable
+ * @throws TException
+ */
+ public void alterPartition(String dbName, String tblName, Partition newPart)
+ throws InvalidOperationException, HiveException {
try {
// Remove the DDL time so that it gets refreshed
if (newPart.getParameters() != null) {
newPart.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
- getMSC().alter_partition(t.getDbName(), t.getTableName(),
- newPart.getTPartition());
+ getMSC().alter_partition(dbName, tblName, newPart.getTPartition());
} catch (MetaException e) {
throw new HiveException("Unable to alter partition.", e);
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy)
@@ -548,6 +548,18 @@
.getValueCols();
ArrayList newOutputColNames = new ArrayList();
java.util.ArrayList newValueEval = new ArrayList();
+ // ReduceSinkOperators that precede GroupByOperators have the keys in the schema in addition
+ // to the values. These are not pruned.
+ List oldSchema = oldRR.getRowSchema().getSignature();
+ for (ColumnInfo colInfo : oldSchema) {
+ if (colInfo.getInternalName().startsWith(Utilities.ReduceField.KEY.toString() + ".")) {
+ String[] nm = oldRR.reverseLookup(colInfo.getInternalName());
+ newRR.put(nm[0], nm[1], colInfo);
+ sig.add(colInfo);
+ } else {
+ break;
+ }
+ }
for (int i = 0; i < retainFlags.length; i++) {
if (retainFlags[i]) {
newValueEval.add(originalValueEval.get(i));
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy)
@@ -110,6 +110,9 @@
parseCtx.getQB().getParseInfo().isInsertToTable();
HiveConf hconf = parseCtx.getConf();
+ // Mark this task as a final map reduce task (ignoring the optional merge task)
+ ((MapredWork)currTask.getWork()).setFinalMapRed(true);
+
// If this file sink desc has been processed due to a linked file sink desc,
// use that task
Map> fileSinkDescs = ctx.getLinkedFileDescTasks();
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingCtx.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingCtx.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingCtx.java (working copy)
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * This class contains the bucketing sorting context that is passed
+ * while walking the operator tree in inferring bucket/sort columns. The context
+ * contains the mappings from operators and files to the columns their output is
+ * bucketed/sorted on.
+ */
+public class BucketingSortingCtx implements NodeProcessorCtx {
+
+ // A mapping from an operator to the columns by which it's output is bucketed
+ Map, List> bucketedColsByOp;
+ // A mapping from a directory which a FileSinkOperator writes into to the columns by which that
+ // output is bucketed
+ Map> bucketedColsByDirectory;
+
+ // A mapping from an operator to the columns by which it's output is sorted
+ Map, List> sortedColsByOp;
+ // A mapping from a directory which a FileSinkOperator writes into to the columns by which that
+ // output is sorted
+ Map> sortedColsByDirectory;
+
+ public BucketingSortingCtx() {
+ this.bucketedColsByOp = new HashMap, List>();
+ this.bucketedColsByDirectory = new HashMap>();
+ this.sortedColsByOp = new HashMap, List>();
+ this.sortedColsByDirectory = new HashMap>();
+ }
+
+
+ public List getBucketedCols(Operator extends OperatorDesc> op) {
+ return bucketedColsByOp.get(op);
+ }
+
+
+ public void setBucketedCols(Operator extends OperatorDesc> op, List bucketCols) {
+ this.bucketedColsByOp.put(op, bucketCols);
+ }
+
+ public Map> getBucketedColsByDirectory() {
+ return bucketedColsByDirectory;
+ }
+
+
+ public void setBucketedColsByDirectory(Map> bucketedColsByDirectory) {
+ this.bucketedColsByDirectory = bucketedColsByDirectory;
+ }
+
+
+ public List getSortedCols(Operator extends OperatorDesc> op) {
+ return sortedColsByOp.get(op);
+ }
+
+
+ public void setSortedCols(Operator extends OperatorDesc> op, List sortedCols) {
+ this.sortedColsByOp.put(op, sortedCols);
+ }
+
+ public Map> getSortedColsByDirectory() {
+ return sortedColsByDirectory;
+ }
+
+
+ public void setSortedColsByDirectory(Map> sortedColsByDirectory) {
+ this.sortedColsByDirectory = sortedColsByDirectory;
+ }
+
+ /**
+ *
+ * BucketSortCol.
+ *
+ * Classes that implement this interface provide a way to store information about equivalent
+ * columns as their names and indexes in the schema change going into and out of operators. The
+ * definition of equivalent columns is up to the class which uses these classes, e.g.
+ * BucketingSortingOpProcFactory. For example, two columns are equivalent if they
+ * contain exactly the same data. Though, it's possible that two columns contain exactly the
+ * same data and are not known to be equivalent.
+ *
+ * E.g. SELECT key a, key b FROM (SELECT key, count(*) c FROM src GROUP BY key) s;
+ * In this case, assuming this is done in a single map reduce job with the group by operator
+ * processed in the reducer, the data coming out of the group by operator will be bucketed
+ * by key, which would be at index 0 in the schema, after the outer select operator, the output
+ * can be viewed as bucketed by either the column with alias a or the column with alias b. To
+ * represent this, there could be a single BucketSortCol implementation instance whose names
+ * include both a and b, and whose indexes include both 0 and 1.
+ *
+ * Implementations of this interface should maintain the restriction that the alias
+ * getNames().get(i) should have index getIndexes().get(i) in the schema.
+ */
+ public static interface BucketSortCol {
+ // Get a list of aliases for the same column
+ public List getNames();
+
+ // Get a list of indexes for which the columns in the schema are the same
+ public List getIndexes();
+
+ // Add an alternative alias for the column this instance represents, and its index in the
+ // schema.
+ public void addAlias(String name, Integer index);
+ }
+
+ /**
+ *
+ * BucketCol.
+ *
+ * An implementation of BucketSortCol which contains known aliases/indexes of equivalent columns
+ * which data is determined to be bucketed on.
+ */
+ public static final class BucketCol implements BucketSortCol, Serializable {
+ private static final long serialVersionUID = 1L;
+ // Equivalent aliases for the column
+ private final List names = new ArrayList();
+ // Indexes of those equivalent columns
+ private final List indexes = new ArrayList();
+
+ public BucketCol(String name, int index) {
+ addAlias(name, index);
+ }
+
+ public BucketCol() {
+
+ }
+
+ @Override
+ public List getNames() {
+ return names;
+ }
+
+ @Override
+ public List getIndexes() {
+ return indexes;
+ }
+
+ @Override
+ public void addAlias(String name, Integer index) {
+ names.add(name);
+ indexes.add(index);
+ }
+
+ @Override
+ // Chooses a representative alias and index to use as the String, the first is used because
+ // it is set in the constructor
+ public String toString() {
+ return "name: " + names.get(0) + " index: " + indexes.get(0);
+ }
+ }
+
+ /**
+ *
+ * SortCol.
+ *
+ * An implementation of BucketSortCol which contains known aliases/indexes of equivalent columns
+ * which data is determined to be sorted on. Unlike aliases, and indexes the sort order is known
+ * to be constant for all equivalent columns.
+ */
+ public static final class SortCol implements BucketSortCol, Serializable {
+ private static final long serialVersionUID = 1L;
+ // Equivalent aliases for the column
+ private final List names = new ArrayList();
+ // Indexes of those equivalent columns
+ private final List indexes = new ArrayList();
+ // Sort order (+|-)
+ private final char sortOrder;
+
+ public SortCol(String name, int index, char sortOrder) {
+ this(sortOrder);
+ addAlias(name, index);
+ }
+
+ public SortCol(char sortOrder) {
+ this.sortOrder = sortOrder;
+ }
+
+
+ @Override
+ public List getNames() {
+ return names;
+ }
+
+ @Override
+ public List getIndexes() {
+ return indexes;
+ }
+
+ @Override
+ public void addAlias(String name, Integer index) {
+ names.add(name);
+ indexes.add(index);
+ }
+
+ public char getSortOrder() {
+ return sortOrder;
+ }
+
+ @Override
+ // Chooses a representative alias, index, and order to use as the String, the first is used
+ // because it is set in the constructor
+ public String toString() {
+ return "name: " + names.get(0) + " index: " + indexes.get(0) + " order: " + sortOrder;
+ }
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java (working copy)
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleExactMatch;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ *
+ * BucketingSortingInferenceOptimizer.
+ *
+ * For each map reduce task, attmepts to infer bucketing and sorting metadata for the outputs.
+ *
+ * Currently only map reduce tasks which produce final output have there output metadata inferred,
+ * but it can be extended to intermediate tasks as well.
+ *
+ * This should be run as the last physical optimizer, as other physical optimizers may invalidate
+ * the inferences made. If a physical optimizer depends on the results and is designed to
+ * carefully maintain these inferences, it may follow this one.
+ */
+public class BucketingSortingInferenceOptimizer implements PhysicalPlanResolver {
+
+ @Override
+ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
+ inferBucketingSorting(Utilities.getMRTasks(pctx.rootTasks));
+ return pctx;
+ }
+
+ /**
+ * For each map reduce task, if it has a reducer, infer whether or not the final output of the
+ * reducer is bucketed and/or sorted
+ *
+ * @param mapRedTasks
+ * @throws SemanticException
+ */
+ private void inferBucketingSorting(List mapRedTasks) throws SemanticException {
+ for (ExecDriver mapRedTask : mapRedTasks) {
+
+ // For now this only is used to determine the bucketing/sorting of outputs, in the future
+ // this can be removed to optimize the query plan based on the bucketing/sorting properties
+ // of the outputs of intermediate map reduce jobs.
+ if (!mapRedTask.getWork().isFinalMapRed()) {
+ continue;
+ }
+
+ Operator extends OperatorDesc> reducer = mapRedTask.getWork().getReducer();
+ if (reducer == null) {
+ continue;
+ }
+
+ BucketingSortingCtx bCtx = new BucketingSortingCtx();
+
+ // RuleRegExp rules are used to match operators anywhere in the tree
+ // RuleExactMatch rules are used to specify exactly what the tree should look like
+ // In particular, this guarantees that the first operator is the reducer
+ // (and its parent(s) are ReduceSinkOperators) since it begins walking the tree from
+ // the reducer.
+ Map opRules = new LinkedHashMap();
+ opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getSelProc());
+ // Matches only GroupByOpeartors which are reducers, rather than map group by operators,
+ // or multi group by optimization specific operators
+ opRules.put(new RuleExactMatch("R2", GroupByOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getGroupByProc());
+ // Matches only JoinOperators which are reducers, rather than map joins, SMB map joins, etc.
+ opRules.put(new RuleExactMatch("R3", JoinOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getJoinProc());
+ opRules.put(new RuleRegExp("R4", ReduceSinkOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getReduceSinkProc());
+ opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getFileSinkProc());
+ // Matches only ExtractOperators which are reducers
+ opRules.put(new RuleExactMatch("R6", ExtractOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getExtractProc());
+ opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getFilterProc());
+ opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getLimitProc());
+ opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getLateralViewForwardProc());
+ opRules.put(new RuleRegExp("R10", LateralViewJoinOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getLateralViewJoinProc());
+ // Matches only ForwardOperators which are preceded by some other operator in the tree,
+ // in particular it can't be a reducer (and hence cannot be one of the ForwardOperators
+ // added by the multi group by optimization)
+ opRules.put(new RuleRegExp("R11", ".+" + ForwardOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getForwardProc());
+ // Matches only ForwardOperators which are reducers and are followed by GroupByOperators
+ // (specific to the multi group by optimization)
+ opRules.put(new RuleExactMatch("R12", ForwardOperator.getOperatorName() + "%" +
+ GroupByOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getMultiGroupByProc());
+
+ // The dispatcher fires the processor corresponding to the closest matching rule and passes
+ // the context along
+ Dispatcher disp = new DefaultRuleDispatcher(BucketingSortingOpProcFactory.getDefaultProc(),
+ opRules, bCtx);
+ GraphWalker ogw = new PreOrderWalker(disp);
+
+ // Create a list of topop nodes
+ ArrayList topNodes = new ArrayList();
+ topNodes.add(reducer);
+ ogw.startWalking(topNodes, null);
+
+ mapRedTask.getWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory());
+ mapRedTask.getWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory());
+ }
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java (working copy)
@@ -0,0 +1,802 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Utils;
+import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol;
+import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol;
+import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Operator factory for the rule processors for inferring bucketing/sorting columns.
+ */
+public class BucketingSortingOpProcFactory {
+
+ public static class DefaultInferrer implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ return null;
+ }
+
+ }
+
+ /**
+ * Infers bucket/sort columns for operators which simply forward rows from the parent
+ * E.g. Forward operators and SELECT *
+ * @param op
+ * @param bctx
+ * @param parent
+ * @throws SemanticException
+ */
+ private static void processForward(Operator extends OperatorDesc> op, BucketingSortingCtx bctx,
+ Operator extends OperatorDesc> parent) throws SemanticException {
+
+ List bucketCols = bctx.getBucketedCols(parent);
+ List sortCols = bctx.getSortedCols(parent);
+ List colInfos = op.getSchema().getSignature();
+
+ if (bucketCols == null && sortCols == null) {
+ return;
+ }
+
+ List newBucketCols;
+ List newSortCols;
+
+ if (bucketCols == null) {
+ newBucketCols = null;
+ } else {
+ newBucketCols = getNewBucketCols(bucketCols, colInfos);
+ }
+
+ if (sortCols == null) {
+ newSortCols = null;
+ } else {
+ newSortCols = getNewSortCols(sortCols, colInfos);
+ }
+
+ bctx.setBucketedCols(op, newBucketCols);
+ bctx.setSortedCols(op, newSortCols);
+ }
+
+ /**
+ * Returns the parent operator in the walk path to the current operator.
+ *
+ * @param stack The stack encoding the path.
+ *
+ * @return Operator The parent operator in the current path.
+ */
+ @SuppressWarnings("unchecked")
+ protected static Operator extends OperatorDesc> getParent(Stack stack) {
+ return (Operator extends OperatorDesc>)Utils.getNthAncestor(stack, 1);
+ }
+
+ /**
+ * Processor for Join Operator.
+ *
+ * This handles common joins, the tree should look like
+ * ReducSinkOperator
+ * \
+ * .... --- JoinOperator
+ * /
+ * ReduceSink Operator
+ *
+ */
+ public static class JoinInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ JoinOperator jop = (JoinOperator)nd;
+ List colInfos = jop.getSchema().getSignature();
+ Byte[] order = jop.getConf().getTagOrder();
+
+ BucketCol[] newBucketCols = null;
+ SortCol[] newSortCols = null;
+
+ for (int i = 0; i < jop.getParentOperators().size(); i++) {
+
+ Operator extends OperatorDesc> parent = jop.getParentOperators().get(i);
+
+ // The caller of this method should guarantee this
+ assert(parent instanceof ReduceSinkOperator);
+
+ ReduceSinkOperator rop = (ReduceSinkOperator)jop.getParentOperators().get(i);
+
+ String sortOrder = rop.getConf().getOrder();
+ List bucketCols = new ArrayList();
+ List sortCols = new ArrayList();
+ // Go through the Reduce keys and find the matching column(s) in the reduce values
+ for (int keyIndex = 0; keyIndex < rop.getConf().getKeyCols().size(); keyIndex++) {
+ for (int valueIndex = 0; valueIndex < rop.getConf().getValueCols().size();
+ valueIndex++) {
+
+ if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(valueIndex)).
+ equals(new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get(
+ keyIndex)))) {
+
+ String colName = rop.getSchema().getSignature().get(valueIndex).getInternalName();
+ bucketCols.add(new BucketCol(colName, keyIndex));
+ sortCols.add(new SortCol(colName, keyIndex, sortOrder.charAt(keyIndex)));
+ break;
+ }
+ }
+ }
+
+ if (bucketCols.isEmpty()) {
+ assert(sortCols.isEmpty());
+ continue;
+ }
+
+ if (newBucketCols == null) {
+ assert(newSortCols == null);
+ // The number of join keys is equal to the number of keys in every reducer, although
+ // not every key may map to a value in the reducer
+ newBucketCols = new BucketCol[rop.getConf().getKeyCols().size()];
+ newSortCols = new SortCol[rop.getConf().getKeyCols().size()];
+ } else {
+ assert(newSortCols != null);
+ }
+
+ byte tag = (byte)rop.getConf().getTag();
+ List exprs = jop.getConf().getExprs().get(tag);
+
+ int colInfosOffset = 0;
+ int orderValue = order[tag];
+ // Columns are output from the join from the different reduce sinks in the order of their
+ // offsets
+ for (byte orderIndex = 0; orderIndex < order.length; orderIndex++) {
+ if (order[orderIndex] < orderValue) {
+ colInfosOffset += jop.getConf().getExprs().get(orderIndex).size();
+ }
+ }
+
+ findBucketingSortingColumns(exprs, colInfos, bucketCols, sortCols, newBucketCols,
+ newSortCols, colInfosOffset);
+
+ }
+
+ setBucketingColsIfComplete(bctx, jop, newBucketCols);
+
+ setSortingColsIfComplete(bctx, jop, newSortCols);
+
+ return null;
+ }
+
+ }
+
+ /**
+ * If the list of output bucket columns has been populated and every column has at least
+ * one representative in the output they can be inferred
+ *
+ * @param bctx - BucketingSortingCtx containing inferred columns
+ * @param op - The operator we are inferring information about the output of
+ * @param newBucketCols - An array of columns on which the output is bucketed, e.g. as output by
+ * the method findBucketingSortingColumns
+ */
+ private static void setBucketingColsIfComplete(BucketingSortingCtx bctx,
+ Operator extends OperatorDesc> op, BucketCol[] newBucketCols) {
+
+ if (newBucketCols != null) {
+ List newBucketColList = Arrays.asList(newBucketCols);
+ // If newBucketColList had a null value it means that at least one of the input bucket
+ // columns did not have a representative found in the output columns, so assume the data
+ // is no longer bucketed
+ if (!newBucketColList.contains(null)) {
+ bctx.setBucketedCols(op, newBucketColList);
+ }
+ }
+ }
+
+ /**
+ * If the list of output sort columns has been populated and every column has at least
+ * one representative in the output they can be inferred
+ *
+ * @param bctx - BucketingSortingCtx containing inferred columns
+ * @param op - The operator we are inferring information about the output of
+ * @param newSortCols - An array of columns on which the output is sorted, e.g. as output by
+ * the method findBucketingSortingColumns
+ */
+ private static void setSortingColsIfComplete(BucketingSortingCtx bctx,
+ Operator extends OperatorDesc> op, SortCol[] newSortCols) {
+
+ if (newSortCols != null) {
+ List newSortColList = Arrays.asList(newSortCols);
+ // If newSortColList had a null value it means that at least one of the input sort
+ // columns did not have a representative found in the output columns, so assume the data
+ // is no longer sorted
+ if (!newSortColList.contains(null)) {
+ bctx.setSortedCols(op, newSortColList);
+ }
+ }
+ }
+
+ private static void findBucketingSortingColumns(List exprs,
+ List colInfos, List bucketCols, List sortCols,
+ BucketCol[] newBucketCols, SortCol[] newSortCols) {
+ findBucketingSortingColumns(exprs, colInfos, bucketCols, sortCols, newBucketCols,
+ newSortCols, 0);
+ }
+
+ /**
+ * For each expression, check if it represents a column known to be bucketed/sorted.
+ *
+ * The methods setBucketingColsIfComplete and setSortingColsIfComplete should be used to assign
+ * the values of newBucketCols and newSortCols as the bucketing/sorting columns of this operator
+ * because these arrays may contain nulls indicating that the output of this operator is not
+ * bucketed/sorted.
+ *
+ * @param exprs - list of expression
+ * @param colInfos - list of column infos
+ * @param bucketCols - list of bucketed columns from the input
+ * @param sortCols - list of sorted columns from the input
+ * @param newBucketCols - an array of bucket columns which should be the same length as
+ * bucketCols, updated such that the bucketed column(s) at index i in bucketCols became
+ * the bucketed column(s) at index i of newBucketCols in the output
+ * @param newSortCols - an array of sort columns which should be the same length as
+ * sortCols, updated such that the sorted column(s) at index i in sortCols became
+ * the sorted column(s) at index i of sortCols in the output
+ * @param colInfosOffset - the expressions are known to be represented by column infos
+ * beginning at this index
+ */
+ private static void findBucketingSortingColumns(List exprs,
+ List colInfos, List bucketCols, List sortCols,
+ BucketCol[] newBucketCols, SortCol[] newSortCols, int colInfosOffset) {
+ for(int cnt = 0; cnt < exprs.size(); cnt++) {
+ ExprNodeDesc expr = exprs.get(cnt);
+
+ // Only columns can be sorted/bucketed, in particular applying a function to a column
+ // voids any assumptions
+ if (!(expr instanceof ExprNodeColumnDesc)) {
+ continue;
+ }
+
+ ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc)expr;
+
+ int colInfosIndex = cnt + colInfosOffset;
+
+ if (newBucketCols != null) {
+ int bucketIndex = indexOfColName(bucketCols, columnExpr.getColumn());
+ if (bucketIndex != -1) {
+ if (newBucketCols[bucketIndex] == null) {
+ newBucketCols[bucketIndex] = new BucketCol();
+ }
+ newBucketCols[bucketIndex].addAlias(
+ colInfos.get(colInfosIndex).getInternalName(), colInfosIndex);
+ }
+ }
+
+ if (newSortCols != null) {
+ int sortIndex = indexOfColName(sortCols, columnExpr.getColumn());
+ if (sortIndex != -1) {
+ if (newSortCols[sortIndex] == null) {
+ newSortCols[sortIndex] = new SortCol(sortCols.get(sortIndex).getSortOrder());
+ }
+ newSortCols[sortIndex].addAlias(
+ colInfos.get(colInfosIndex).getInternalName(), colInfosIndex);
+ }
+ }
+ }
+ }
+
+ /**
+ * Processor for Select operator.
+ */
+ public static class SelectInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ SelectOperator sop = (SelectOperator)nd;
+
+ Operator extends OperatorDesc> parent = getParent(stack);
+
+ // if this is a selStarNoCompute then this select operator
+ // is treated like a default operator, so just call the super classes
+ // process method.
+ if (sop.getConf().isSelStarNoCompute()) {
+ processForward(sop, bctx, parent);
+ return null;
+ }
+
+ List bucketCols = bctx.getBucketedCols(parent);
+ List sortCols = bctx.getSortedCols(parent);
+ List colInfos = sop.getSchema().getSignature();
+
+ if (bucketCols == null && sortCols == null) {
+ return null;
+ }
+
+ BucketCol[] newBucketCols = null;
+ SortCol[] newSortCols = null;
+ if (bucketCols != null) {
+ newBucketCols = new BucketCol[bucketCols.size()];
+ }
+ if (sortCols != null) {
+ newSortCols = new SortCol[sortCols.size()];
+ }
+
+ findBucketingSortingColumns(sop.getConf().getColList(), colInfos, bucketCols, sortCols,
+ newBucketCols, newSortCols);
+
+ setBucketingColsIfComplete(bctx, sop, newBucketCols);
+
+ setSortingColsIfComplete(bctx, sop, newSortCols);
+
+ return null;
+ }
+
+ }
+
+ /**
+ * Find the BucketSortCol which has colName as one of its aliases. Returns the index of that
+ * BucketSortCol, or -1 if none exist
+ * @param bucketSortCols
+ * @param colName
+ * @return
+ */
+ private static int indexOfColName(List extends BucketSortCol> bucketSortCols, String colName) {
+ for (int index = 0; index < bucketSortCols.size(); index++) {
+ BucketSortCol bucketSortCol = bucketSortCols.get(index);
+ if (bucketSortCol.getNames().indexOf(colName) != -1) {
+ return index;
+ }
+ }
+
+ return -1;
+ }
+
+ /**
+ * This is used to construct new lists of bucketed columns where the order of the columns
+ * hasn't changed, only possibly the name
+ * @param bucketCols - input bucketed columns
+ * @param colInfos - List of column infos
+ * @return output bucketed columns
+ */
+ private static List getNewBucketCols(List bucketCols,
+ List colInfos) {
+
+ List newBucketCols = new ArrayList(bucketCols.size());
+ for (int i = 0; i < bucketCols.size(); i++) {
+ BucketCol bucketCol = new BucketCol();
+ for (Integer index : bucketCols.get(i).getIndexes()) {
+ // The only time this condition should be false is in the case of dynamic partitioning
+ // where the data is bucketed on a dynamic partitioning column and the FileSinkOperator is
+ // being processed. In this case, the dynamic partition column will not appear in
+ // colInfos, and due to the limitations of dynamic partitioning, they will appear at the
+ // end of the input schema. Since the order of the columns hasn't changed, and no new
+ // columns have been added/removed, it is safe to assume that these will have indexes
+ // greater than or equal to colInfos.size().
+ if (index < colInfos.size()) {
+ bucketCol.addAlias(colInfos.get(index).getInternalName(), index);
+ } else {
+ return null;
+ }
+ }
+ newBucketCols.add(bucketCol);
+ }
+ return newBucketCols;
+ }
+
+ /**
+ * This is used to construct new lists of sorted columns where the order of the columns
+ * hasn't changed, only possibly the name
+ * @param bucketCols - input sorted columns
+ * @param colInfos - List of column infos
+ * @return output sorted columns
+ */
+ private static List getNewSortCols(List sortCols, List colInfos) {
+ List newSortCols = new ArrayList(sortCols.size());
+ for (int i = 0; i < sortCols.size(); i++) {
+ SortCol sortCol = new SortCol(sortCols.get(i).getSortOrder());
+ for (Integer index : sortCols.get(i).getIndexes()) {
+ // The only time this condition should be false is in the case of dynamic partitioning
+ if (index < colInfos.size()) {
+ sortCol.addAlias(colInfos.get(index).getInternalName(), index);
+ } else {
+ return null;
+ }
+ }
+ newSortCols.add(sortCol);
+ }
+ return newSortCols;
+ }
+
+ /**
+ * Processor for FileSink operator.
+ */
+ public static class FileSinkInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ FileSinkOperator fop = (FileSinkOperator)nd;
+
+ Operator extends OperatorDesc> parent = getParent(stack);
+ List bucketCols = bctx.getBucketedCols(parent);
+ List colInfos = fop.getSchema().getSignature();
+
+ // Set the inferred bucket columns for the file this FileSink produces
+ if (bucketCols != null) {
+ List newBucketCols = getNewBucketCols(bucketCols, colInfos);
+ bctx.getBucketedColsByDirectory().put(fop.getConf().getDirName(), newBucketCols);
+ bctx.setBucketedCols(fop, newBucketCols);
+ }
+
+ List sortCols = bctx.getSortedCols(parent);
+
+ // Set the inferred sort columns for the file this FileSink produces
+ if (sortCols != null) {
+ List newSortCols = getNewSortCols(sortCols, colInfos);
+ bctx.getSortedColsByDirectory().put(fop.getConf().getDirName(), newSortCols);
+ bctx.setSortedCols(fop, newSortCols);
+ }
+
+ return null;
+ }
+
+ }
+
+ /**
+ * Processor for Extract operator.
+ *
+ * Only handles the case where the tree looks like
+ *
+ * ReduceSinkOperator --- ExtractOperator
+ *
+ * This is the case for distribute by, sort by, order by, cluster by operators.
+ */
+ public static class ExtractInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ ExtractOperator exop = (ExtractOperator)nd;
+
+ // As of writing this, there is no case where this could be false, this is just protection
+ // from possible future changes
+ if (exop.getParentOperators().size() != 1) {
+ return null;
+ }
+
+ Operator extends OperatorDesc> parent = exop.getParentOperators().get(0);
+
+ // The caller of this method should guarantee this
+ assert(parent instanceof ReduceSinkOperator);
+
+ ReduceSinkOperator rop = (ReduceSinkOperator)parent;
+
+ // Go through the set of partition columns, and find their representatives in the values
+ // These represent the bucketed columns
+ List bucketCols = new ArrayList();
+ for (int i = 0; i < rop.getConf().getPartitionCols().size(); i++) {
+ boolean valueColFound = false;
+ for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
+ if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
+ new ExprNodeDescEqualityWrapper(rop.getConf().getPartitionCols().get(i)))) {
+
+ bucketCols.add(new BucketCol(
+ rop.getSchema().getSignature().get(j).getInternalName(), j));
+ valueColFound = true;
+ break;
+ }
+ }
+
+ // If the partition columns can't all be found in the values then the data is not bucketed
+ if (!valueColFound) {
+ bucketCols.clear();
+ break;
+ }
+ }
+
+ // Go through the set of key columns, and find their representatives in the values
+ // These represent the sorted columns
+ String sortOrder = rop.getConf().getOrder();
+ List sortCols = new ArrayList();
+ for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) {
+ boolean valueColFound = false;
+ for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
+ if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
+ new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get(i)))) {
+
+ sortCols.add(new SortCol(
+ rop.getSchema().getSignature().get(j).getInternalName(), j, sortOrder.charAt(i)));
+ valueColFound = true;
+ break;
+ }
+ }
+
+ // If the sorted columns can't all be found in the values then the data is only sorted on
+ // the columns seen up until now
+ if (!valueColFound) {
+ break;
+ }
+ }
+
+ List colInfos = exop.getSchema().getSignature();
+
+ if (!bucketCols.isEmpty()) {
+ List newBucketCols = getNewBucketCols(bucketCols, colInfos);
+ bctx.setBucketedCols(exop, newBucketCols);
+ }
+
+ if (!sortCols.isEmpty()) {
+ List newSortCols = getNewSortCols(sortCols, colInfos);
+ bctx.setSortedCols(exop, newSortCols);
+ }
+
+ return null;
+ }
+ }
+
+ /**
+ * Processor for GroupByOperator, the special case where it follows a ForwardOperator
+ *
+ * There is a multi group by optimization which puts multiple group by operators in a
+ * reducer when they share the same keys and are part of a multi insert query.
+ *
+ * In this case the tree should look like
+ * Group By Operator
+ * /
+ * ReduceSinkOperator - ForwardOperator --- ...
+ * \
+ * GroupByOperator
+ *
+ */
+
+ public static class MultiGroupByInferrer extends GroupByInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ GroupByOperator gop = (GroupByOperator)nd;
+
+ if (gop.getParentOperators().size() != 1) {
+ return null;
+ }
+
+ Operator extends OperatorDesc> fop = gop.getParentOperators().get(0);
+
+ // The caller of this method should guarantee this
+ assert(fop instanceof ForwardOperator);
+
+ if (fop.getParentOperators().size() != 1) {
+ return null;
+ }
+
+ Operator extends OperatorDesc> rop = fop.getParentOperators().get(0);
+
+ // The caller of this method should guarantee this
+ assert(rop instanceof ReduceSinkOperator);
+
+ processGroupByReduceSink((ReduceSinkOperator) rop, gop, bctx);
+
+ processForward(fop, bctx, rop);
+
+ return processGroupBy(fop, gop, bctx);
+ }
+ }
+
+ /**
+ * Processor for GroupBy operator.
+ *
+ * This handles the standard use of a group by operator, the tree should look like
+ *
+ * ReduceSinkOperator --- GroupByOperator
+ *
+ * It is up to the caller to guarantee the tree matches this pattern.
+ */
+ public static class GroupByInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ GroupByOperator gop = (GroupByOperator)nd;
+
+ // As of writing this, there is no case where this could be false, this is just protection
+ // from possible future changes
+ if (gop.getParentOperators().size() != 1) {
+ return null;
+ }
+
+ Operator extends OperatorDesc> rop = gop.getParentOperators().get(0);
+
+ // The caller of this method should guarantee this
+ assert(rop instanceof ReduceSinkOperator);
+
+ processGroupByReduceSink((ReduceSinkOperator) rop, gop, bctx);
+
+ return processGroupBy((ReduceSinkOperator)rop , gop, bctx);
+ }
+
+ /**
+ * Process the ReduceSinkOperator preceding a GroupByOperator to determine which columns
+ * are bucketed and sorted.
+ *
+ * @param rop
+ * @param gop
+ * @param bctx
+ */
+ protected void processGroupByReduceSink(ReduceSinkOperator rop, GroupByOperator gop,
+ BucketingSortingCtx bctx){
+
+ String sortOrder = rop.getConf().getOrder();
+ List bucketCols = new ArrayList();
+ List sortCols = new ArrayList();
+ assert rop.getConf().getKeyCols().size() <= rop.getSchema().getSignature().size();
+ // Group by operators select the key cols, so no need to find them in the values
+ for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) {
+ String colName = rop.getSchema().getSignature().get(i).getInternalName();
+ bucketCols.add(new BucketCol(colName, i));
+ sortCols.add(new SortCol(colName, i, sortOrder.charAt(i)));
+ }
+ bctx.setBucketedCols(rop, bucketCols);
+ bctx.setSortedCols(rop, sortCols);
+ }
+
+ /**
+ * Process a GroupByOperator to determine which if any columns the output is bucketed and
+ * sorted by, assumes the columns output by the parent which are bucketed and sorted have
+ * already been determined.
+ *
+ * @param parent
+ * @param gop
+ * @param bctx
+ * @return
+ */
+ protected Object processGroupBy(Operator extends OperatorDesc> parent, GroupByOperator gop,
+ BucketingSortingCtx bctx) {
+ List bucketCols = bctx.getBucketedCols(parent);
+ List sortCols = bctx.getSortedCols(parent);
+ List colInfos = gop.getSchema().getSignature();
+
+ if (bucketCols == null) {
+ assert sortCols == null;
+ return null;
+ }
+
+ if (bucketCols.isEmpty()) {
+ assert sortCols.isEmpty();
+ return null;
+ }
+
+ BucketCol[] newBucketCols = new BucketCol[bucketCols.size()];
+ SortCol[] newSortCols = new SortCol[sortCols.size()];
+
+ findBucketingSortingColumns(gop.getConf().getKeys(), colInfos, bucketCols, sortCols,
+ newBucketCols, newSortCols);
+
+ setBucketingColsIfComplete(bctx, gop, newBucketCols);
+
+ setSortingColsIfComplete(bctx, gop, newSortCols);
+
+ return null;
+ }
+ }
+
+ /**
+ * ReduceSink processor.
+ */
+ public static class ReduceSinkInferrer extends DefaultInferrer implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ return null;
+ }
+ }
+
+ /**
+ * Filter processor
+ */
+ public static class ForwardingInferrer extends DefaultInferrer implements NodeProcessor {
+ @SuppressWarnings("unchecked")
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ processForward((Operator extends OperatorDesc>)nd, (BucketingSortingCtx)procCtx,
+ getParent(stack));
+
+ return null;
+ }
+ }
+
+ public static NodeProcessor getDefaultProc() {
+ return new DefaultInferrer();
+ }
+
+ public static NodeProcessor getJoinProc() {
+ return new JoinInferrer();
+ }
+
+ public static NodeProcessor getSelProc() {
+ return new SelectInferrer();
+ }
+
+ public static NodeProcessor getGroupByProc() {
+ return new GroupByInferrer();
+ }
+
+ public static NodeProcessor getReduceSinkProc() {
+ return new ReduceSinkInferrer();
+ }
+
+ public static NodeProcessor getFileSinkProc() {
+ return new FileSinkInferrer();
+ }
+
+ public static NodeProcessor getExtractProc() {
+ return new ExtractInferrer();
+ }
+
+ public static NodeProcessor getFilterProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getLimitProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getLateralViewForwardProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getLateralViewJoinProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getForwardProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getMultiGroupByProc() {
+ return new MultiGroupByInferrer();
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (working copy)
@@ -59,6 +59,13 @@
if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
resolvers.add(new MetadataOnlyOptimizer());
}
+
+ // Physical optimizers which follow this need to be careful not to invalidate the inferences
+ // made by this optimizer. Only optimizers which depend on the results of this one should
+ // follow it.
+ if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT)) {
+ resolvers.add(new BucketingSortingInferenceOptimizer());
+ }
}
/**
Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 1438869)
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy)
@@ -31,6 +31,8 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol;
+import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.QBJoinTree;
import org.apache.hadoop.hive.ql.parse.SplitSample;
@@ -93,6 +95,18 @@
private transient boolean useBucketizedHiveInputFormat;
+ // if this is true, this means that this is the map reduce task which writes the final data,
+ // ignoring the optional merge task
+ private boolean finalMapRed = false;
+
+ // If this map reduce task has a FileSinkOperator, and bucketing/sorting metadata can be
+ // inferred about the data being written by that operator, these are mappings from the directory
+ // that operator writes into to the bucket/sort columns for that data.
+ private final Map> bucketedColsByDirectory =
+ new HashMap>();
+ private final Map> sortedColsByDirectory =
+ new HashMap>();
+
public MapredWork() {
aliasToPartnInfo = new LinkedHashMap();
}
@@ -279,6 +293,16 @@
this.numReduceTasks = numReduceTasks;
}
+ @Explain(displayName = "Path -> Bucketed Columns", normalExplain = false)
+ public Map> getBucketedColsByDirectory() {
+ return bucketedColsByDirectory;
+ }
+
+ @Explain(displayName = "Path -> Sorted Columns", normalExplain = false)
+ public Map> getSortedColsByDirectory() {
+ return sortedColsByDirectory;
+ }
+
@SuppressWarnings("nls")
public void addMapWork(String path, String alias, Operator> work,
PartitionDesc pd) {
@@ -525,4 +549,12 @@
public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat) {
this.useBucketizedHiveInputFormat = useBucketizedHiveInputFormat;
}
+
+ public boolean isFinalMapRed() {
+ return finalMapRed;
+ }
+
+ public void setFinalMapRed(boolean finalMapRed) {
+ this.finalMapRed = finalMapRed;
+ }
}
Index: ql/src/test/queries/clientnegative/merge_negative_3.q
===================================================================
--- ql/src/test/queries/clientnegative/merge_negative_3.q (revision 0)
+++ ql/src/test/queries/clientnegative/merge_negative_3.q (working copy)
@@ -0,0 +1,6 @@
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+create table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE;
+insert overwrite table srcpart2 partition (ds='2011') select * from src;
+alter table srcpart2 partition (ds = '2011') concatenate;
Index: ql/src/test/queries/clientpositive/infer_bucket_sort.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort.q (working copy)
@@ -0,0 +1,158 @@
+set hive.exec.infer.bucket.sort=true;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test distribute by, should only be bucketed by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join in simple subquery, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join in simple subquery renaming key column, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in simple subquery, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in simple subquery renaming key column, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with where outside, should still be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with expression on value, should still be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery followed by transform script, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q (working copy)
@@ -0,0 +1,25 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.auto.convert.join=true;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where joins may be auto converted to map joins.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+set hive.mapjoin.check.memory.rows=1;
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+
+-- Tests a join which is not converted to a map join, the output should be bucketed and sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q (working copy)
@@ -0,0 +1,87 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where dynamic partitioning is used.
+
+CREATE TABLE test_table LIKE srcpart;
+ALTER TABLE test_table SET FILEFORMAT RCFILE;
+
+-- Simple case, this should not be bucketed or sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, value, ds, hr FROM srcpart
+WHERE ds = '2008-04-08';
+
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11');
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12');
+
+-- This should not be bucketed or sorted since the partition keys are in the set of bucketed
+-- and sorted columns for the output
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, COUNT(*), ds, hr FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key, ds, hr;
+
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11');
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12');
+
+-- Both partitions should be bucketed and sorted by key
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, value, '2008-04-08', IF (key % 2 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a;
+
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11');
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12');
+
+CREATE TABLE srcpart_merge_dp LIKE srcpart;
+
+CREATE TABLE srcpart_merge_dp_rc LIKE srcpart;
+ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12);
+
+INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08';
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+set hive.merge.smallfiles.avgsize=200;
+set hive.exec.compress.output=false;
+set hive.exec.dynamic.partition=true;
+set mapred.reduce.tasks=2;
+
+-- Tests dynamic partitions where bucketing/sorting can be inferred, but some partitions are
+-- merged and some are moved. Currently neither should be bucketed or sorted, in the future,
+-- (ds='2008-04-08', hr='12') may be bucketed and sorted, (ds='2008-04-08', hr='11') should
+-- definitely not be.
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a;
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a;
+
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11');
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q (working copy)
@@ -0,0 +1,57 @@
+set hive.exec.infer.bucket.sort=true;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, in particular, this tests
+-- the grouping operators rollup/cube/grouping sets
+
+CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING);
+
+CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING);
+
+-- Test rollup, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP;
+
+INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP;
+
+DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
+
+-- Test rollup, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP;
+
+DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1');
+
+-- Test cube, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE;
+
+INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE;
+
+DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
+
+-- Test cube, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE;
+
+DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1');
+
+-- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value);
+
+INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value);
+
+DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
+
+-- Test grouping sets, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value);
+
+DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q (working copy)
@@ -0,0 +1,33 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+CREATE TABLE list_bucketing_table (key STRING, value STRING)
+PARTITIONED BY (part STRING)
+SKEWED BY (key) ON ("484")
+STORED AS DIRECTORIES;
+
+-- Tests group by, the output should neither be bucketed nor sorted
+
+INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key;
+
+DESC FORMATTED list_bucketing_table PARTITION (part = '1');
+
+-- create a table skewed on a key which doesnt exist in the data
+CREATE TABLE list_bucketing_table2 (key STRING, value STRING)
+PARTITIONED BY (part STRING)
+SKEWED BY (key) ON ("abc")
+STORED AS DIRECTORIES;
+
+-- should not be bucketed or sorted
+INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key;
+
+DESC FORMATTED list_bucketing_table2 PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q (working copy)
@@ -0,0 +1,74 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, in particular, this tests
+-- that operators in the mapper have no effect
+
+CREATE TABLE test_table1 (key STRING, value STRING)
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS;
+
+CREATE TABLE test_table2 (key STRING, value STRING)
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS;
+
+INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src;
+
+INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src;
+
+CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+set hive.map.groupby.sorted=true;
+
+-- Test map group by doesn't affect inference, should not be bucketed or sorted
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, count(*) FROM test_table1 GROUP BY key;
+
+INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, count(*) FROM test_table1 GROUP BY key;
+
+DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
+
+-- Test map group by doesn't affect inference, should be bucketed and sorted by value
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT a.key, a.value FROM (
+ SELECT key, count(*) AS value FROM test_table1 GROUP BY key
+) a JOIN (
+ SELECT key, value FROM src
+) b
+ON (a.value = b.value);
+
+INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT a.key, a.value FROM (
+ SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key
+) a JOIN (
+ SELECT key, value FROM src
+) b
+ON (a.value = b.value);
+
+DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
+
+set hive.map.groupby.sorted=false;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+-- Test SMB join doesn't affect inference, should not be bucketed or sorted
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key;
+
+INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
+
+-- Test SMB join doesn't affect inference, should be bucketed and sorted by key
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+GROUP BY b.value;
+
+INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+GROUP BY b.value;
+
+DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
+
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q (working copy)
@@ -0,0 +1,25 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.merge.mapredfiles=true;
+set mapred.reduce.tasks=2;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where where merging may or may not be used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+set hive.merge.smallfiles.avgsize=2;
+set hive.exec.compress.output=false;
+
+-- Tests a reduce task followed by a move. The output should be bucketed and sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q (working copy)
@@ -0,0 +1,46 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where multi insert is used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Simple case, neither partition should be bucketed or sorted
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+DESCRIBE FORMATTED test_table PARTITION (part = '2');
+
+-- The partitions should be bucketed and sorted by different keys
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+DESCRIBE FORMATTED test_table PARTITION (part = '2');
+
+-- The first partition should be bucketed and sorted, the second should not
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+DESCRIBE FORMATTED test_table PARTITION (part = '2');
+
+set hive.multigroupby.singlereducer=true;
+
+-- Test the multi group by single reducer optimization
+-- Both partitions should be bucketed by key
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+DESCRIBE FORMATTED test_table PARTITION (part = '2');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q (working copy)
@@ -0,0 +1,37 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.reduce.tasks=2;
+
+CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING);
+
+-- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write
+-- all partitions. The subquery produces rows as follows
+-- key = 0:
+-- 0, , 0
+-- key = 1:
+-- 0, , 1
+-- key = 2:
+-- 1, , 0
+-- This means that by distributing by the first column into two reducers, and using the third
+-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers
+-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key
+-- and hr=1 should not.
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key2, value, cast(hr as int) FROM
+(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr
+FROM srcpart
+WHERE ds = '2008-04-08') a
+DISTRIBUTE BY key2;
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key2, value, cast(hr as int) FROM
+(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr
+FROM srcpart
+WHERE ds = '2008-04-08') a
+DISTRIBUTE BY key2;
+
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0');
+DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q (working copy)
@@ -0,0 +1,46 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.exec.reducers.bytes.per.reducer=2500;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, it also verifies that the
+-- number of reducers chosen will be a power of two
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q
===================================================================
--- ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q (revision 1438869)
+++ ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q (working copy)
@@ -1,5 +1,7 @@
create table t1( key_int1 int, key_int2 int, key_string1 string, key_string2 string);
+set hive.optimize.reducededuplication=false;
+
set hive.map.aggr=false;
select Q1.key_int1, sum(Q1.key_int1) from (select * from t1 cluster by key_int1) Q1 group by Q1.key_int1;
Index: ql/src/test/results/clientnegative/merge_negative_3.q.out
===================================================================
--- ql/src/test/results/clientnegative/merge_negative_3.q.out (revision 0)
+++ ql/src/test/results/clientnegative/merge_negative_3.q.out (working copy)
@@ -0,0 +1,16 @@
+PREHOOK: query: create table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcpart2
+PREHOOK: query: insert overwrite table srcpart2 partition (ds='2011') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@srcpart2@ds=2011
+POSTHOOK: query: insert overwrite table srcpart2 partition (ds='2011') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@srcpart2@ds=2011
+POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Merge can not perform on bucketized partition/table.
Index: ql/src/test/results/clientpositive/ctas.q.out
===================================================================
--- ql/src/test/results/clientpositive/ctas.q.out (revision 1438869)
+++ ql/src/test/results/clientpositive/ctas.q.out (working copy)
@@ -827,6 +827,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [src]
@@ -891,6 +893,8 @@
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/infer_bucket_sort.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort.q.out (working copy)
@@ -0,0 +1,2565 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key, value]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 4970
+ totalSize 5998
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 2654
+ rawDataSize 28466
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 2654
+ rawDataSize 28466
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test distribute by, should only be bucketed by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test distribute by, should only be bucketed by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:0)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 6
+ rawDataSize 18
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 6
+ rawDataSize 18
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 618
+ rawDataSize 2964
+ totalSize 3582
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 5
+ rawDataSize 19
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 3582
+ totalSize 3891
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key, value]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out (working copy)
@@ -0,0 +1,139 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where joins may be auto converted to map joins.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where joins may be auto converted to map joins.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Tests a join which is not converted to a map join, the output should be bucketed and sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+Execution failed with exit status: 3
+Obtaining error information
+
+Task failed!
+Task ID:
+ Stage-7
+
+Logs:
+
+#### A masked pattern was here ####
+FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.MapredLocalTask
+ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.MapRedTask
+POSTHOOK: query: -- Tests a join which is not converted to a map join, the output should be bucketed and sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out (working copy)
@@ -0,0 +1,902 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where dynamic partitioning is used.
+
+CREATE TABLE test_table LIKE srcpart
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where dynamic partitioning is used.
+
+CREATE TABLE test_table LIKE srcpart
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: ALTER TABLE test_table SET FILEFORMAT RCFILE
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@test_table
+PREHOOK: Output: default@test_table
+POSTHOOK: query: ALTER TABLE test_table SET FILEFORMAT RCFILE
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@test_table
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Simple case, this should not be bucketed or sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, value, ds, hr FROM srcpart
+WHERE ds = '2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@test_table
+POSTHOOK: query: -- Simple case, this should not be bucketed or sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, value, ds, hr FROM srcpart
+WHERE ds = '2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 11]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 5293
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 12]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 5293
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- This should not be bucketed or sorted since the partition keys are in the set of bucketed
+-- and sorted columns for the output
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, COUNT(*), ds, hr FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key, ds, hr
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@test_table
+POSTHOOK: query: -- This should not be bucketed or sorted since the partition keys are in the set of bucketed
+-- and sorted columns for the output
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, COUNT(*), ds, hr FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key, ds, hr
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 11]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 1342
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 12]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 1342
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Both partitions should be bucketed and sorted by key
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, value, '2008-04-08', IF (key % 2 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@test_table
+POSTHOOK: query: -- Both partitions should be bucketed and sorted by key
+
+INSERT OVERWRITE TABLE test_table PARTITION (ds, hr)
+SELECT key, value, '2008-04-08', IF (key % 2 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 11]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 719
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 12]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 722
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: CREATE TABLE srcpart_merge_dp LIKE srcpart
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcpart_merge_dp LIKE srcpart
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcpart_merge_dp
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: CREATE TABLE srcpart_merge_dp_rc LIKE srcpart
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcpart_merge_dp_rc LIKE srcpart
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcpart_merge_dp_rc
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE
+PREHOOK: type: ALTERTABLE_FILEFORMAT
+PREHOOK: Input: default@srcpart_merge_dp_rc
+PREHOOK: Output: default@srcpart_merge_dp_rc
+POSTHOOK: query: ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE
+POSTHOOK: type: ALTERTABLE_FILEFORMAT
+POSTHOOK: Input: default@srcpart_merge_dp_rc
+POSTHOOK: Output: default@srcpart_merge_dp_rc
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcpart_merge_dp
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcpart_merge_dp
+POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11)
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12)
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcpart_merge_dp
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12)
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcpart_merge_dp
+POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=12
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_merge_dp
+PREHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_merge_dp_rc@ds=2008-04-08
+POSTHOOK: query: INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_merge_dp
+POSTHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=12
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but some partitions are
+-- merged and some are moved. Currently neither should be bucketed or sorted, in the future,
+-- (ds='2008-04-08', hr='12') may be bucketed and sorted, (ds='2008-04-08', hr='11') should
+-- definitely not be.
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but some partitions are
+-- merged and some are moved. Currently neither should be bucketed or sorted, in the future,
+-- (ds='2008-04-08', hr='12') may be bucketed and sorted, (ds='2008-04-08', hr='11') should
+-- definitely not be.
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION IF (== (% (TOK_TABLE_OR_COL key) 100) 0) '11' '12')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:srcpart
+ TableScan
+ alias: srcpart
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: if(((_col0 % 100) = 0), '11', '12')
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToString(_col1)
+ type: string
+ expr: _col2
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.test_table
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008-04-08
+ hr
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.test_table
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Block level merge
+
+ Stage: Stage-5
+ Block level merge
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@test_table@ds=2008-04-08
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
+(SELECT key, COUNT(*) AS value FROM srcpart
+WHERE ds = '2008-04-08'
+GROUP BY key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 11]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 115
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ]
+# col_name data_type comment
+
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 12]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 2
+ numRows 0
+ rawDataSize 0
+ totalSize 1427
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out (working copy)
@@ -0,0 +1,825 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, in particular, this tests
+-- the grouping operators rollup/cube/grouping sets
+
+CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, in particular, this tests
+-- the grouping operators rollup/cube/grouping sets
+
+CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table_out
+PREHOOK: query: CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table_out_2
+PREHOOK: query: -- Test rollup, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Test rollup, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table_out@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table_out@part=1
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+agg string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 619
+ rawDataSize 6309
+ totalSize 6928
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table_out_2@part=1
+POSTHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table_out_2@part=1
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+grouping_key string None
+agg string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out_2
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 619
+ rawDataSize 7547
+ totalSize 8166
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key, value, grouping_key]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test cube, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Test cube, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table_out@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table_out@part=1
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+agg string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 928
+ rawDataSize 9954
+ totalSize 10882
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table_out_2@part=1
+POSTHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table_out_2@part=1
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+grouping_key string None
+agg string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out_2
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 928
+ rawDataSize 11810
+ totalSize 12738
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key, value, grouping_key]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL key)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table_out@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table_out@part=1
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+agg string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 618
+ rawDataSize 6054
+ totalSize 6672
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table_out_2@part=1
+POSTHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key
+
+INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table_out_2@part=1
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE []
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+grouping_key string None
+agg string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out_2
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 618
+ rawDataSize 7290
+ totalSize 7908
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key, value, grouping_key]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out (working copy)
@@ -0,0 +1,158 @@
+PREHOOK: query: -- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+CREATE TABLE list_bucketing_table (key STRING, value STRING)
+PARTITIONED BY (part STRING)
+SKEWED BY (key) ON ("484")
+STORED AS DIRECTORIES
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- create a skewed table
+CREATE TABLE list_bucketing_table (key STRING, value STRING)
+PARTITIONED BY (part STRING)
+SKEWED BY (key) ON ("484")
+STORED AS DIRECTORIES
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucketing_table
+PREHOOK: query: -- Tests group by, the output should neither be bucketed nor sorted
+
+INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@list_bucketing_table@part=1
+POSTHOOK: query: -- Tests group by, the output should neither be bucketed nor sorted
+
+INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@list_bucketing_table@part=1
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESC FORMATTED list_bucketing_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESC FORMATTED list_bucketing_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: list_bucketing_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 2
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Stored As SubDirectories: Yes
+Skewed Columns: [key]
+Skewed Values: [[484]]
+#### A masked pattern was here ####
+Skewed Value to Truncated Path: {[484]=/list_bucketing_table/part=1/key=484}
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- create a table skewed on a key which doesnt exist in the data
+CREATE TABLE list_bucketing_table2 (key STRING, value STRING)
+PARTITIONED BY (part STRING)
+SKEWED BY (key) ON ("abc")
+STORED AS DIRECTORIES
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- create a table skewed on a key which doesnt exist in the data
+CREATE TABLE list_bucketing_table2 (key STRING, value STRING)
+PARTITIONED BY (part STRING)
+SKEWED BY (key) ON ("abc")
+STORED AS DIRECTORIES
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucketing_table2
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: -- should not be bucketed or sorted
+INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@list_bucketing_table2@part=1
+POSTHOOK: query: -- should not be bucketed or sorted
+INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@list_bucketing_table2@part=1
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESC FORMATTED list_bucketing_table2 PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESC FORMATTED list_bucketing_table2 PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: list_bucketing_table2
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 136
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Stored As SubDirectories: Yes
+Skewed Columns: [key]
+Skewed Values: [[abc]]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out (working copy)
@@ -0,0 +1,846 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, in particular, this tests
+-- that operators in the mapper have no effect
+
+CREATE TABLE test_table1 (key STRING, value STRING)
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, in particular, this tests
+-- that operators in the mapper have no effect
+
+CREATE TABLE test_table1 (key STRING, value STRING)
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key STRING, value STRING)
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key STRING, value STRING)
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table_out
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Test map group by doesn't affect inference, should not be bucketed or sorted
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, count(*) FROM test_table1 GROUP BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Test map group by doesn't affect inference, should not be bucketed or sorted
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, count(*) FROM test_table1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, count(*) FROM test_table1 GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Output: default@test_table_out@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT key, count(*) FROM test_table1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Output: default@test_table_out@part=1
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT a.key, a.value FROM (
+ SELECT key, count(*) AS value FROM test_table1 GROUP BY key
+) a JOIN (
+ SELECT key, value FROM src
+) b
+ON (a.value = b.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT a.key, a.value FROM (
+ SELECT key, count(*) AS value FROM test_table1 GROUP BY key
+) a JOIN (
+ SELECT key, value FROM src
+) b
+ON (a.value = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToDouble(_col1)
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToDouble(_col1)
+ type: double
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ b:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: value
+ type: string
+ outputColumnNames: _col1
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToDouble(_col1)
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToDouble(_col1)
+ type: double
+ tag: 1
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT a.key, a.value FROM (
+ SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key
+) a JOIN (
+ SELECT key, value FROM src
+) b
+ON (a.value = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@test_table1
+PREHOOK: Output: default@test_table_out@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT a.key, a.value FROM (
+ SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key
+) a JOIN (
+ SELECT key, value FROM src
+) b
+ON (a.value = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Output: default@test_table_out@part=1
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 0
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test SMB join doesn't affect inference, should not be bucketed or sorted
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Test SMB join doesn't affect inference, should not be bucketed or sorted
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b
+ TableScan
+ alias: b
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col5
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table_out@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table_out@part=1
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test SMB join doesn't affect inference, should be bucketed and sorted by key
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+GROUP BY b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Test SMB join doesn't affect inference, should be bucketed and sorted by key
+EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+GROUP BY b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) value))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b
+ TableScan
+ alias: b
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col5
+ Position of Big Table: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ expressions:
+ expr: _col5
+ type: string
+ outputColumnNames: _col5
+ Select Operator
+ expressions:
+ expr: _col5
+ type: string
+ outputColumnNames: _col5
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col5
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table_out
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+GROUP BY b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table_out@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
+SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+GROUP BY b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table_out@part=1
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)a.null, (test_table2)b.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)a.null, (test_table2)b.null, ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table_out
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 3037
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out (working copy)
@@ -0,0 +1,125 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where where merging may or may not be used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where where merging may or may not be used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 2
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 2
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out (working copy)
@@ -0,0 +1,509 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where multi insert is used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where multi insert is used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Simple case, neither partition should be bucketed or sorted
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+PREHOOK: Output: default@test_table@part=2
+POSTHOOK: query: -- Simple case, neither partition should be bucketed or sorted
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Output: default@test_table@part=2
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [2]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- The partitions should be bucketed and sorted by different keys
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+PREHOOK: Output: default@test_table@part=2
+POSTHOOK: query: -- The partitions should be bucketed and sorted by different keys
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Output: default@test_table@part=2
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [2]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 2718
+ totalSize 3027
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- The first partition should be bucketed and sorted, the second should not
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+PREHOOK: Output: default@test_table@part=2
+POSTHOOK: query: -- The first partition should be bucketed and sorted, the second should not
+
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Output: default@test_table@part=2
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [2]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test the multi group by single reducer optimization
+-- Both partitions should be bucketed by key
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+PREHOOK: Output: default@test_table@part=2
+POSTHOOK: query: -- Test the multi group by single reducer optimization
+-- Both partitions should be bucketed by key
+FROM src
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Output: default@test_table@part=2
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [2]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 2690
+ totalSize 2999
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out (working copy)
@@ -0,0 +1,234 @@
+PREHOOK: query: CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write
+-- all partitions. The subquery produces rows as follows
+-- key = 0:
+-- 0, , 0
+-- key = 1:
+-- 0, , 1
+-- key = 2:
+-- 1, , 0
+-- This means that by distributing by the first column into two reducers, and using the third
+-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers
+-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key
+-- and hr=1 should not.
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key2, value, cast(hr as int) FROM
+(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr
+FROM srcpart
+WHERE ds = '2008-04-08') a
+DISTRIBUTE BY key2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write
+-- all partitions. The subquery produces rows as follows
+-- key = 0:
+-- 0, , 0
+-- key = 1:
+-- 0, , 1
+-- key = 2:
+-- 1, , 0
+-- This means that by distributing by the first column into two reducers, and using the third
+-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers
+-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key
+-- and hr=1 should not.
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key2, value, cast(hr as int) FROM
+(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr
+FROM srcpart
+WHERE ds = '2008-04-08') a
+DISTRIBUTE BY key2
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION if (< (% (TOK_TABLE_OR_COL key) 3) 2) 0 1) key2) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (% (TOK_TABLE_OR_COL key) 2) hr)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key2)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL hr)))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key2))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:srcpart
+ TableScan
+ alias: srcpart
+ Select Operator
+ expressions:
+ expr: if(((key % 3) < 2), 0, 1)
+ type: int
+ expr: value
+ type: string
+ expr: (key % 2)
+ type: double
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: int
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008-04-08
+ hr
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key2, value, cast(hr as int) FROM
+(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr
+FROM srcpart
+WHERE ds = '2008-04-08') a
+DISTRIBUTE BY key2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@test_table@ds=2008-04-08
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr)
+SELECT key2, value, cast(hr as int) FROM
+(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr
+FROM srcpart
+WHERE ds = '2008-04-08') a
+DISTRIBUTE BY key2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=0
+POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=1
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key int None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 0]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 2
+ numRows 0
+ rawDataSize 0
+ totalSize 6558
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 2
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key int None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 3254
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out (working copy)
@@ -0,0 +1,399 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, it also verifies that the
+-- number of reducers chosen will be a power of two
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, it also verifies that the
+-- number of reducers chosen will be a power of two
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key, value]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 16
+ numRows 0
+ rawDataSize 0
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 16
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/compiler/plan/case_sensitivity.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/case_sensitivity.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/case_sensitivity.q.xml (working copy)
@@ -1266,6 +1266,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/cast1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/cast1.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/cast1.q.xml (working copy)
@@ -1076,6 +1076,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby1.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/groupby1.q.xml (working copy)
@@ -992,6 +992,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/groupby2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby2.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/groupby2.q.xml (working copy)
@@ -1097,6 +1097,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby3.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/groupby3.q.xml (working copy)
@@ -1296,6 +1296,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby4.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/groupby4.q.xml (working copy)
@@ -746,6 +746,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby5.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby5.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/groupby5.q.xml (working copy)
@@ -839,6 +839,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby6.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/groupby6.q.xml (working copy)
@@ -746,6 +746,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input1.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input1.q.xml (working copy)
@@ -1081,6 +1081,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input2.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input2.q.xml (working copy)
@@ -2741,6 +2741,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input20.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input20.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input20.q.xml (working copy)
@@ -854,6 +854,9 @@
+
+ true
+
@@ -1490,21 +1493,21 @@
- OP_4
+ EX_4
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input4.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input4.q.xml (working copy)
@@ -1004,6 +1004,9 @@
+
+ true
+
true
@@ -1452,21 +1455,21 @@
- OP_4
+ EX_4
+
+ true
+
true
@@ -1490,21 +1493,21 @@
- OP_4
+ EX_4
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input7.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input7.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input7.q.xml (working copy)
@@ -983,6 +983,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input8.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input8.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input8.q.xml (working copy)
@@ -651,6 +651,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input9.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input9.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input9.q.xml (working copy)
@@ -1060,6 +1060,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input_part1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_part1.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input_part1.q.xml (working copy)
@@ -782,6 +782,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input_testsequencefile.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (working copy)
@@ -991,6 +991,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input_testxpath.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_testxpath.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input_testxpath.q.xml (working copy)
@@ -763,6 +763,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input_testxpath2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_testxpath2.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/input_testxpath2.q.xml (working copy)
@@ -851,6 +851,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/join1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join1.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join1.q.xml (working copy)
@@ -1073,6 +1073,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/join2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join2.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join2.q.xml (working copy)
@@ -969,6 +969,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/join3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join3.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join3.q.xml (working copy)
@@ -1494,6 +1494,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/join4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join4.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join4.q.xml (working copy)
@@ -1467,6 +1467,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/join5.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join5.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join5.q.xml (working copy)
@@ -1467,6 +1467,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/join6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join6.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join6.q.xml (working copy)
@@ -1467,6 +1467,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/join7.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join7.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join7.q.xml (working copy)
@@ -2188,6 +2188,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/join8.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join8.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/join8.q.xml (working copy)
@@ -1549,6 +1549,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/sample1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample1.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/sample1.q.xml (working copy)
@@ -903,6 +903,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/sample2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample2.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/sample2.q.xml (working copy)
@@ -1193,6 +1193,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample3.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/sample3.q.xml (working copy)
@@ -1203,6 +1203,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample4.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/sample4.q.xml (working copy)
@@ -1193,6 +1193,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample5.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample5.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/sample5.q.xml (working copy)
@@ -1190,6 +1190,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample6.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/sample6.q.xml (working copy)
@@ -1193,6 +1193,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample7.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample7.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/sample7.q.xml (working copy)
@@ -1275,6 +1275,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/subq.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/subq.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/subq.q.xml (working copy)
@@ -1081,6 +1081,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/udf1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf1.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/udf1.q.xml (working copy)
@@ -1902,6 +1902,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/udf4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf4.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/udf4.q.xml (working copy)
@@ -1824,6 +1824,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/udf6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf6.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/udf6.q.xml (working copy)
@@ -582,6 +582,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/udf_case.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf_case.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/udf_case.q.xml (working copy)
@@ -676,6 +676,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/udf_when.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf_when.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/udf_when.q.xml (working copy)
@@ -756,6 +756,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/union.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/union.q.xml (revision 1438869)
+++ ql/src/test/results/compiler/plan/union.q.xml (working copy)
@@ -1626,6 +1626,9 @@
+
+ true
+