Index: build-common.xml =================================================================== --- build-common.xml (revision 1440043) +++ build-common.xml (working copy) @@ -57,7 +57,7 @@ - + Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1440043) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -674,6 +674,17 @@ HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES( "hive.multi.insert.move.tasks.share.dependencies", false), + // If this is set, when writing partitions, the metadata will include the bucketing/sorting + // properties with which the data was written if any (this will not overwrite the metadata + // inherited from the table if the table is bucketed/sorted) + HIVE_INFER_BUCKET_SORT("hive.exec.infer.bucket.sort", false), + // If this is set, when setting the number of reducers for the map reduce task which writes the + // final output files, it will choose a number which is a power of two. The number of reducers + // may be set to a power of two, only to be followed by a merge task meaning preventing + // anything from being inferred. + HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO( + "hive.exec.infer.bucket.sort.num.buckets.power.two", false), + /* The following section contains all configurations used for list bucketing feature.*/ /* This is not for clients. but only for block merge task. */ /* This is used by BlockMergeTask to send out flag to RCFileMergeMapper */ Index: conf/hive-default.xml.template =================================================================== --- conf/hive-default.xml.template (revision 1440043) +++ conf/hive-default.xml.template (working copy) @@ -1669,6 +1669,39 @@ + hive.exec.infer.bucket.sort + false + + If this is set, when writing partitions, the metadata will include the bucketing/sorting + properties with which the data was written if any (this will not overwrite the metadata + inherited from the table if the table is bucketed/sorted) + + + + + hive.exec.infer.bucket.sort.num.buckets.power.two + false + + If this is set, when setting the number of reducers for the map reduce task which writes the + final output files, it will choose a number which is a power of two, unless the user specifies + the number of reducers to use using mapred.reduce.tasks. The number of reducers + may be set to a power of two, only to be followed by a merge task meaning preventing + anything from being inferred. + With hive.exec.infer.bucket.sort set to true: + Advantages: If this is not set, the number of buckets for partitions will seem arbitrary, + which means that the number of mappers used for optimized joins, for example, will + be very low. With this set, since the number of buckets used for any partition is + a power of two, the number of mappers used for optimized joins will be the least + number of buckets used by any partition being joined. + Disadvantages: This may mean a much larger or much smaller number of reducers being used in the + final map reduce job, e.g. if a job was originally going to take 257 reducers, + it will now take 512 reducers, similarly if the max number of reducers is 511, + and a job was going to use this many, it will now use 256 reducers. + + + + + hive.groupby.orderby.position.alias false Whether to enable using Column Position Alias in Group By or Order By Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (working copy) @@ -91,6 +91,12 @@ //recursively remove this task from its children's parent task tsk.removeFromChildrenTasks(); } else { + if (getParentTasks() != null) { + // This makes it so that we can go back up the tree later + for (Task task : getParentTasks()) { + task.addDependentTask(tsk); + } + } // resolved task if (!driverContext.getRunnable().contains(tsk)) { console.printInfo(tsk.getId() + " is selected by condition resolver."); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (working copy) @@ -49,4 +49,16 @@ public OperatorType getType() { return OperatorType.EXTRACT; } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "EX"; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java (working copy) @@ -40,4 +40,16 @@ public OperatorType getType() { return OperatorType.FORWARD; } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "FOR"; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (working copy) @@ -422,6 +422,10 @@ * Estimate the number of reducers needed for this job, based on job input, * and configuration parameters. * + * The output of this method should only be used if the output of this + * MapRedTask is not being used to populate a bucketed table and the user + * has not specified the number of reducers to use. + * * @return the number of reducers. */ private int estimateNumberOfReducers() throws IOException { @@ -447,6 +451,30 @@ int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer); reducers = Math.max(1, reducers); reducers = Math.min(maxReducers, reducers); + + // If this map reduce job writes final data to a table and bucketing is being inferred, + // and the user has configured Hive to do this, make sure the number of reducers is a + // power of two + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) && + work.isFinalMapRed() && !work.getBucketedColsByDirectory().isEmpty()) { + + int reducersLog = (int)(Math.log(reducers) / Math.log(2)) + 1; + int reducersPowerTwo = (int)Math.pow(2, reducersLog); + + // If the original number of reducers was a power of two, use that + if (reducersPowerTwo / 2 == reducers) { + return reducers; + } else if (reducersPowerTwo > maxReducers) { + // If the next power of two greater than the original number of reducers is greater + // than the max number of reducers, use the preceding power of two, which is strictly + // less than the original number of reducers and hence the max + reducers = reducersPowerTwo / 2; + } else { + // Otherwise use the smallest power of two greater than the original number of reducers + reducers = reducersPowerTwo; + } + } + return reducers; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (working copy) @@ -35,21 +35,28 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; +import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -276,6 +283,48 @@ } } else { LOG.info("Partition is: " + tbd.getPartitionSpec().toString()); + + // Check if the bucketing and/or sorting columns were inferred + List bucketCols = null; + List sortCols = null; + int numBuckets = -1; + Task task = this; + String path = tbd.getSourceDir(); + // Find the first ancestor of this MoveTask which is some form of map reduce task + // (Either standard, local, or a merge) + while (task.getParentTasks() != null && task.getParentTasks().size() == 1) { + task = (Task)task.getParentTasks().get(0); + // If it was a merge task or a local map reduce task, nothing can be inferred + if (task instanceof BlockMergeTask || task instanceof MapredLocalTask) { + break; + } + + // If it's a standard map reduce task, check what, if anything, it inferred about + // the directory this move task is moving + if (task instanceof MapRedTask) { + MapredWork work = (MapredWork)task.getWork(); + bucketCols = work.getBucketedColsByDirectory().get(path); + sortCols = work.getSortedColsByDirectory().get(path); + numBuckets = work.getNumReduceTasks(); + if (bucketCols != null || sortCols != null) { + // This must be a final map reduce task (the task containing the file sink + // operator that writes the final output) + assert work.isFinalMapRed(); + } + break; + } + + // If it's a move task, get the path the files were moved from, this is what any + // preceding map reduce task inferred information about, and moving does not invalidate + // those assumptions + // This can happen when a conditional merge is added before the final MoveTask, but the + // condition for merging is not met, see GenMRFileSink1. + if (task instanceof MoveTask) { + if (((MoveTask)task).getWork().getLoadFileWork() != null) { + path = ((MoveTask)task).getWork().getLoadFileWork().getSourceDir(); + } + } + } // deal with dynamic partitions DynamicPartitionCtx dpCtx = tbd.getDPCtx(); if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions @@ -314,6 +363,10 @@ for (LinkedHashMap partSpec: dp) { Partition partn = db.getPartition(table, partSpec, false); + if (bucketCols != null || sortCols != null) { + updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols); + } + WriteEntity enty = new WriteEntity(partn, true); if (work.getOutputs() != null) { work.getOutputs().add(enty); @@ -344,6 +397,11 @@ tbd.getPartitionSpec(), tbd.getReplace(), tbd.getHoldDDLTime(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd)); Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); + + if (bucketCols != null || sortCols != null) { + updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols); + } + dc = new DataContainer(table.getTTable(), partn.getTPartition()); // add this partition to post-execution hook if (work.getOutputs() != null) { @@ -371,6 +429,80 @@ .isSkewedStoredAsDir(); } + /** + * Alters the bucketing and/or sorting columns of the partition provided they meet some + * validation criteria, e.g. the number of buckets match the number of files, and the + * columns are not partition columns + * @param table + * @param partn + * @param bucketCols + * @param numBuckets + * @param sortCols + * @throws IOException + * @throws InvalidOperationException + * @throws HiveException + */ + private void updatePartitionBucketSortColumns(Table table, Partition partn, + List bucketCols, int numBuckets, List sortCols) + throws IOException, InvalidOperationException, HiveException { + + boolean updateBucketCols = false; + if (bucketCols != null) { + FileSystem fileSys = partn.getPartitionPath().getFileSystem(conf); + FileStatus[] fileStatus = Utilities.getFileStatusRecurse( + partn.getPartitionPath(), 1, fileSys); + // Verify the number of buckets equals the number of files + // This will not hold for dynamic partitions where not every reducer produced a file for + // those partitions. In this case the table is not bucketed as Hive requires a files for + // each bucket. + if (fileStatus.length == numBuckets) { + List newBucketCols = new ArrayList(); + updateBucketCols = true; + for (BucketCol bucketCol : bucketCols) { + if (bucketCol.getIndexes().get(0) < partn.getCols().size()) { + newBucketCols.add(partn.getCols().get( + bucketCol.getIndexes().get(0)).getName()); + } else { + // If the table is bucketed on a partition column, not valid for bucketing + updateBucketCols = false; + break; + } + } + if (updateBucketCols) { + partn.getBucketCols().clear(); + partn.getBucketCols().addAll(newBucketCols); + partn.getTPartition().getSd().setNumBuckets(numBuckets); + } + } + } + + boolean updateSortCols = false; + if (sortCols != null) { + List newSortCols = new ArrayList(); + updateSortCols = true; + for (SortCol sortCol : sortCols) { + if (sortCol.getIndexes().get(0) < partn.getCols().size()) { + newSortCols.add(new Order( + partn.getCols().get(sortCol.getIndexes().get(0)).getName(), + sortCol.getSortOrder() == '+' ? BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC : + BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC)); + } else { + // If the table is sorted on a partition column, not valid for sorting + updateSortCols = false; + break; + } + } + if (updateSortCols) { + partn.getSortCols().clear(); + partn.getSortCols().addAll(newSortCols); + } + } + + if (updateBucketCols || updateSortCols) { + db.alterPartition(table.getDbName(), table.getTableName(), partn); + } + } + /* * Does the move task involve moving to a local file system */ Index: ql/src/java/org/apache/hadoop/hive/ql/lib/RuleExactMatch.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/RuleExactMatch.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/RuleExactMatch.java (working copy) @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.lib; + +import java.util.Stack; + +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * Implentation of the Rule interface for Nodes Used in Node dispatching to dispatch + * process/visitor functions for Nodes. The cost method returns 1 if there is an exact + * match between the expression and the stack, otherwise -1. + */ +public class RuleExactMatch implements Rule { + + private final String ruleName; + private final String pattern; + + /** + * The rule specified as operator names separated by % symbols, the left side represents the + * bottom of the stack. + * + * E.g. TS%FIL%RS -> means + * TableScan Node followed by Filter followed by ReduceSink in the tree, or, in terms of the + * stack, ReduceSink on top followed by Filter followed by TableScan + * + * @param ruleName + * name of the rule + * @param regExp + * string specification of the rule + **/ + public RuleExactMatch(String ruleName, String pattern) { + this.ruleName = ruleName; + this.pattern = pattern; + } + + /** + * This function returns the cost of the rule for the specified stack. Returns 1 if there is + * an exact match with the entire stack, otherwise -1 + * + * If any proper substack of the stack matches it will return -1. It only returns 1 if the + * entire stack matches the rule exactly. + * + * @param stack + * Node stack encountered so far + * @return cost of the function + * @throws SemanticException + */ + public int cost(Stack stack) throws SemanticException { + int numElems = (stack != null ? stack.size() : 0); + String name = new String(); + for (int pos = numElems - 1; pos >= 0; pos--) { + name = stack.get(pos).getName() + "%" + name; + } + + if (pattern.equals(name)) { + return 1; + } + + return -1; + } + + /** + * @return the name of the Node + **/ + public String getName() { + return ruleName; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -409,7 +409,7 @@ } /** - * Updates the existing table metadata with the new metadata. + * Updates the existing partition metadata with the new metadata. * * @param tblName * name of the existing table @@ -422,13 +422,30 @@ public void alterPartition(String tblName, Partition newPart) throws InvalidOperationException, HiveException { Table t = newTable(tblName); + alterPartition(t.getDbName(), t.getTableName(), newPart); + } + + /** + * Updates the existing partition metadata with the new metadata. + * + * @param dbName + * name of the exiting table's database + * @param tblName + * name of the existing table + * @param newPart + * new partition + * @throws InvalidOperationException + * if the changes in metadata is not acceptable + * @throws TException + */ + public void alterPartition(String dbName, String tblName, Partition newPart) + throws InvalidOperationException, HiveException { try { // Remove the DDL time so that it gets refreshed if (newPart.getParameters() != null) { newPart.getParameters().remove(hive_metastoreConstants.DDL_TIME); } - getMSC().alter_partition(t.getDbName(), t.getTableName(), - newPart.getTPartition()); + getMSC().alter_partition(dbName, tblName, newPart.getTPartition()); } catch (MetaException e) { throw new HiveException("Unable to alter partition.", e); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -548,6 +548,18 @@ .getValueCols(); ArrayList newOutputColNames = new ArrayList(); java.util.ArrayList newValueEval = new ArrayList(); + // ReduceSinkOperators that precede GroupByOperators have the keys in the schema in addition + // to the values. These are not pruned. + List oldSchema = oldRR.getRowSchema().getSignature(); + for (ColumnInfo colInfo : oldSchema) { + if (colInfo.getInternalName().startsWith(Utilities.ReduceField.KEY.toString() + ".")) { + String[] nm = oldRR.reverseLookup(colInfo.getInternalName()); + newRR.put(nm[0], nm[1], colInfo); + sig.add(colInfo); + } else { + break; + } + } for (int i = 0; i < retainFlags.length; i++) { if (retainFlags[i]) { newValueEval.add(originalValueEval.get(i)); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -106,6 +106,9 @@ parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); + // Mark this task as a final map reduce task (ignoring the optional merge task) + ((MapredWork)currTask.getWork()).setFinalMapRed(true); + // If this file sink desc has been processed due to a linked file sink desc, // use that task Map> fileSinkDescs = ctx.getLinkedFileDescTasks(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingCtx.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingCtx.java (working copy) @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * This class contains the bucketing sorting context that is passed + * while walking the operator tree in inferring bucket/sort columns. The context + * contains the mappings from operators and files to the columns their output is + * bucketed/sorted on. + */ +public class BucketingSortingCtx implements NodeProcessorCtx { + + // A mapping from an operator to the columns by which it's output is bucketed + Map, List> bucketedColsByOp; + // A mapping from a directory which a FileSinkOperator writes into to the columns by which that + // output is bucketed + Map> bucketedColsByDirectory; + + // A mapping from an operator to the columns by which it's output is sorted + Map, List> sortedColsByOp; + // A mapping from a directory which a FileSinkOperator writes into to the columns by which that + // output is sorted + Map> sortedColsByDirectory; + + public BucketingSortingCtx() { + this.bucketedColsByOp = new HashMap, List>(); + this.bucketedColsByDirectory = new HashMap>(); + this.sortedColsByOp = new HashMap, List>(); + this.sortedColsByDirectory = new HashMap>(); + } + + + public List getBucketedCols(Operator op) { + return bucketedColsByOp.get(op); + } + + + public void setBucketedCols(Operator op, List bucketCols) { + this.bucketedColsByOp.put(op, bucketCols); + } + + public Map> getBucketedColsByDirectory() { + return bucketedColsByDirectory; + } + + + public void setBucketedColsByDirectory(Map> bucketedColsByDirectory) { + this.bucketedColsByDirectory = bucketedColsByDirectory; + } + + + public List getSortedCols(Operator op) { + return sortedColsByOp.get(op); + } + + + public void setSortedCols(Operator op, List sortedCols) { + this.sortedColsByOp.put(op, sortedCols); + } + + public Map> getSortedColsByDirectory() { + return sortedColsByDirectory; + } + + + public void setSortedColsByDirectory(Map> sortedColsByDirectory) { + this.sortedColsByDirectory = sortedColsByDirectory; + } + + /** + * + * BucketSortCol. + * + * Classes that implement this interface provide a way to store information about equivalent + * columns as their names and indexes in the schema change going into and out of operators. The + * definition of equivalent columns is up to the class which uses these classes, e.g. + * BucketingSortingOpProcFactory. For example, two columns are equivalent if they + * contain exactly the same data. Though, it's possible that two columns contain exactly the + * same data and are not known to be equivalent. + * + * E.g. SELECT key a, key b FROM (SELECT key, count(*) c FROM src GROUP BY key) s; + * In this case, assuming this is done in a single map reduce job with the group by operator + * processed in the reducer, the data coming out of the group by operator will be bucketed + * by key, which would be at index 0 in the schema, after the outer select operator, the output + * can be viewed as bucketed by either the column with alias a or the column with alias b. To + * represent this, there could be a single BucketSortCol implementation instance whose names + * include both a and b, and whose indexes include both 0 and 1. + * + * Implementations of this interface should maintain the restriction that the alias + * getNames().get(i) should have index getIndexes().get(i) in the schema. + */ + public static interface BucketSortCol { + // Get a list of aliases for the same column + public List getNames(); + + // Get a list of indexes for which the columns in the schema are the same + public List getIndexes(); + + // Add an alternative alias for the column this instance represents, and its index in the + // schema. + public void addAlias(String name, Integer index); + } + + /** + * + * BucketCol. + * + * An implementation of BucketSortCol which contains known aliases/indexes of equivalent columns + * which data is determined to be bucketed on. + */ + public static final class BucketCol implements BucketSortCol, Serializable { + private static final long serialVersionUID = 1L; + // Equivalent aliases for the column + private final List names = new ArrayList(); + // Indexes of those equivalent columns + private final List indexes = new ArrayList(); + + public BucketCol(String name, int index) { + addAlias(name, index); + } + + public BucketCol() { + + } + + @Override + public List getNames() { + return names; + } + + @Override + public List getIndexes() { + return indexes; + } + + @Override + public void addAlias(String name, Integer index) { + names.add(name); + indexes.add(index); + } + + @Override + // Chooses a representative alias and index to use as the String, the first is used because + // it is set in the constructor + public String toString() { + return "name: " + names.get(0) + " index: " + indexes.get(0); + } + } + + /** + * + * SortCol. + * + * An implementation of BucketSortCol which contains known aliases/indexes of equivalent columns + * which data is determined to be sorted on. Unlike aliases, and indexes the sort order is known + * to be constant for all equivalent columns. + */ + public static final class SortCol implements BucketSortCol, Serializable { + private static final long serialVersionUID = 1L; + // Equivalent aliases for the column + private final List names = new ArrayList(); + // Indexes of those equivalent columns + private final List indexes = new ArrayList(); + // Sort order (+|-) + private final char sortOrder; + + public SortCol(String name, int index, char sortOrder) { + this(sortOrder); + addAlias(name, index); + } + + public SortCol(char sortOrder) { + this.sortOrder = sortOrder; + } + + + @Override + public List getNames() { + return names; + } + + @Override + public List getIndexes() { + return indexes; + } + + @Override + public void addAlias(String name, Integer index) { + names.add(name); + indexes.add(index); + } + + public char getSortOrder() { + return sortOrder; + } + + @Override + // Chooses a representative alias, index, and order to use as the String, the first is used + // because it is set in the constructor + public String toString() { + return "name: " + names.get(0) + " index: " + indexes.get(0) + " order: " + sortOrder; + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java (working copy) @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.ExecDriver; +import org.apache.hadoop.hive.ql.exec.ExtractOperator; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.ForwardOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.PreOrderWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleExactMatch; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * + * BucketingSortingInferenceOptimizer. + * + * For each map reduce task, attmepts to infer bucketing and sorting metadata for the outputs. + * + * Currently only map reduce tasks which produce final output have there output metadata inferred, + * but it can be extended to intermediate tasks as well. + * + * This should be run as the last physical optimizer, as other physical optimizers may invalidate + * the inferences made. If a physical optimizer depends on the results and is designed to + * carefully maintain these inferences, it may follow this one. + */ +public class BucketingSortingInferenceOptimizer implements PhysicalPlanResolver { + + @Override + public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + inferBucketingSorting(Utilities.getMRTasks(pctx.rootTasks)); + return pctx; + } + + /** + * For each map reduce task, if it has a reducer, infer whether or not the final output of the + * reducer is bucketed and/or sorted + * + * @param mapRedTasks + * @throws SemanticException + */ + private void inferBucketingSorting(List mapRedTasks) throws SemanticException { + for (ExecDriver mapRedTask : mapRedTasks) { + + // For now this only is used to determine the bucketing/sorting of outputs, in the future + // this can be removed to optimize the query plan based on the bucketing/sorting properties + // of the outputs of intermediate map reduce jobs. + if (!mapRedTask.getWork().isFinalMapRed()) { + continue; + } + + Operator reducer = mapRedTask.getWork().getReducer(); + if (reducer == null) { + continue; + } + + BucketingSortingCtx bCtx = new BucketingSortingCtx(); + + // RuleRegExp rules are used to match operators anywhere in the tree + // RuleExactMatch rules are used to specify exactly what the tree should look like + // In particular, this guarantees that the first operator is the reducer + // (and its parent(s) are ReduceSinkOperators) since it begins walking the tree from + // the reducer. + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getSelProc()); + // Matches only GroupByOpeartors which are reducers, rather than map group by operators, + // or multi group by optimization specific operators + opRules.put(new RuleExactMatch("R2", GroupByOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getGroupByProc()); + // Matches only JoinOperators which are reducers, rather than map joins, SMB map joins, etc. + opRules.put(new RuleExactMatch("R3", JoinOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getJoinProc()); + opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getFileSinkProc()); + // Matches only ExtractOperators which are reducers + opRules.put(new RuleExactMatch("R6", ExtractOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getExtractProc()); + opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getFilterProc()); + opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getLimitProc()); + opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getLateralViewForwardProc()); + opRules.put(new RuleRegExp("R10", LateralViewJoinOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getLateralViewJoinProc()); + // Matches only ForwardOperators which are preceded by some other operator in the tree, + // in particular it can't be a reducer (and hence cannot be one of the ForwardOperators + // added by the multi group by optimization) + opRules.put(new RuleRegExp("R11", ".+" + ForwardOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getForwardProc()); + // Matches only ForwardOperators which are reducers and are followed by GroupByOperators + // (specific to the multi group by optimization) + opRules.put(new RuleExactMatch("R12", ForwardOperator.getOperatorName() + "%" + + GroupByOperator.getOperatorName() + "%"), + BucketingSortingOpProcFactory.getMultiGroupByProc()); + + // The dispatcher fires the processor corresponding to the closest matching rule and passes + // the context along + Dispatcher disp = new DefaultRuleDispatcher(BucketingSortingOpProcFactory.getDefaultProc(), + opRules, bCtx); + GraphWalker ogw = new PreOrderWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.add(reducer); + ogw.startWalking(topNodes, null); + + mapRedTask.getWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory()); + mapRedTask.getWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory()); + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java (working copy) @@ -0,0 +1,785 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.ExtractOperator; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.ForwardOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Utils; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * Operator factory for the rule processors for inferring bucketing/sorting columns. + */ +public class BucketingSortingOpProcFactory { + + public static class DefaultInferrer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + return null; + } + + } + + /** + * Infers bucket/sort columns for operators which simply forward rows from the parent + * E.g. Forward operators and SELECT * + * @param op + * @param bctx + * @param parent + * @throws SemanticException + */ + private static void processForward(Operator op, BucketingSortingCtx bctx, + Operator parent) throws SemanticException { + + List bucketCols = bctx.getBucketedCols(parent); + List sortCols = bctx.getSortedCols(parent); + List colInfos = op.getSchema().getSignature(); + + if (bucketCols == null && sortCols == null) { + return; + } + + List newBucketCols; + List newSortCols; + + if (bucketCols == null) { + newBucketCols = null; + } else { + newBucketCols = getNewBucketCols(bucketCols, colInfos); + } + + if (sortCols == null) { + newSortCols = null; + } else { + newSortCols = getNewSortCols(sortCols, colInfos); + } + + bctx.setBucketedCols(op, newBucketCols); + bctx.setSortedCols(op, newSortCols); + } + + /** + * Returns the parent operator in the walk path to the current operator. + * + * @param stack The stack encoding the path. + * + * @return Operator The parent operator in the current path. + */ + @SuppressWarnings("unchecked") + protected static Operator getParent(Stack stack) { + return (Operator)Utils.getNthAncestor(stack, 1); + } + + /** + * Processor for Join Operator. + * + * This handles common joins, the tree should look like + * ReducSinkOperator + * \ + * .... --- JoinOperator + * / + * ReduceSink Operator + * + */ + public static class JoinInferrer extends DefaultInferrer implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx; + JoinOperator jop = (JoinOperator)nd; + List colInfos = jop.getSchema().getSignature(); + Byte[] order = jop.getConf().getTagOrder(); + + BucketCol[] newBucketCols = null; + SortCol[] newSortCols = null; + + for (int i = 0; i < jop.getParentOperators().size(); i++) { + + Operator parent = jop.getParentOperators().get(i); + + // The caller of this method should guarantee this + assert(parent instanceof ReduceSinkOperator); + + ReduceSinkOperator rop = (ReduceSinkOperator)jop.getParentOperators().get(i); + + String sortOrder = rop.getConf().getOrder(); + List bucketCols = new ArrayList(); + List sortCols = new ArrayList(); + // Go through the Reduce keys and find the matching column(s) in the reduce values + for (int keyIndex = 0; keyIndex < rop.getConf().getKeyCols().size(); keyIndex++) { + for (int valueIndex = 0; valueIndex < rop.getConf().getValueCols().size(); + valueIndex++) { + + if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(valueIndex)). + equals(new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get( + keyIndex)))) { + + String colName = rop.getSchema().getSignature().get(valueIndex).getInternalName(); + bucketCols.add(new BucketCol(colName, keyIndex)); + sortCols.add(new SortCol(colName, keyIndex, sortOrder.charAt(keyIndex))); + break; + } + } + } + + if (bucketCols.isEmpty()) { + assert(sortCols.isEmpty()); + continue; + } + + if (newBucketCols == null) { + assert(newSortCols == null); + // The number of join keys is equal to the number of keys in every reducer, although + // not every key may map to a value in the reducer + newBucketCols = new BucketCol[rop.getConf().getKeyCols().size()]; + newSortCols = new SortCol[rop.getConf().getKeyCols().size()]; + } else { + assert(newSortCols != null); + } + + byte tag = (byte)rop.getConf().getTag(); + List exprs = jop.getConf().getExprs().get(tag); + + int colInfosOffset = 0; + int orderValue = order[tag]; + // Columns are output from the join from the different reduce sinks in the order of their + // offsets + for (byte orderIndex = 0; orderIndex < order.length; orderIndex++) { + if (order[orderIndex] < orderValue) { + colInfosOffset += jop.getConf().getExprs().get(orderIndex).size(); + } + } + + findBucketingSortingColumns(exprs, colInfos, bucketCols, sortCols, newBucketCols, + newSortCols, colInfosOffset); + + } + + setBucketingColsIfComplete(bctx, jop, newBucketCols); + + setSortingColsIfComplete(bctx, jop, newSortCols); + + return null; + } + + } + + /** + * If the list of output bucket columns has been populated and every column has at least + * one representative in the output they can be inferred + * + * @param bctx - BucketingSortingCtx containing inferred columns + * @param op - The operator we are inferring information about the output of + * @param newBucketCols - An array of columns on which the output is bucketed, e.g. as output by + * the method findBucketingSortingColumns + */ + private static void setBucketingColsIfComplete(BucketingSortingCtx bctx, + Operator op, BucketCol[] newBucketCols) { + + if (newBucketCols != null) { + List newBucketColList = Arrays.asList(newBucketCols); + // If newBucketColList had a null value it means that at least one of the input bucket + // columns did not have a representative found in the output columns, so assume the data + // is no longer bucketed + if (!newBucketColList.contains(null)) { + bctx.setBucketedCols(op, newBucketColList); + } + } + } + + /** + * If the list of output sort columns has been populated and every column has at least + * one representative in the output they can be inferred + * + * @param bctx - BucketingSortingCtx containing inferred columns + * @param op - The operator we are inferring information about the output of + * @param newSortCols - An array of columns on which the output is sorted, e.g. as output by + * the method findBucketingSortingColumns + */ + private static void setSortingColsIfComplete(BucketingSortingCtx bctx, + Operator op, SortCol[] newSortCols) { + + if (newSortCols != null) { + List newSortColList = Arrays.asList(newSortCols); + // If newSortColList had a null value it means that at least one of the input sort + // columns did not have a representative found in the output columns, so assume the data + // is no longer sorted + if (!newSortColList.contains(null)) { + bctx.setSortedCols(op, newSortColList); + } + } + } + + private static void findBucketingSortingColumns(List exprs, + List colInfos, List bucketCols, List sortCols, + BucketCol[] newBucketCols, SortCol[] newSortCols) { + findBucketingSortingColumns(exprs, colInfos, bucketCols, sortCols, newBucketCols, + newSortCols, 0); + } + + /** + * For each expression, check if it represents a column known to be bucketed/sorted. + * + * The methods setBucketingColsIfComplete and setSortingColsIfComplete should be used to assign + * the values of newBucketCols and newSortCols as the bucketing/sorting columns of this operator + * because these arrays may contain nulls indicating that the output of this operator is not + * bucketed/sorted. + * + * @param exprs - list of expression + * @param colInfos - list of column infos + * @param bucketCols - list of bucketed columns from the input + * @param sortCols - list of sorted columns from the input + * @param newBucketCols - an array of bucket columns which should be the same length as + * bucketCols, updated such that the bucketed column(s) at index i in bucketCols became + * the bucketed column(s) at index i of newBucketCols in the output + * @param newSortCols - an array of sort columns which should be the same length as + * sortCols, updated such that the sorted column(s) at index i in sortCols became + * the sorted column(s) at index i of sortCols in the output + * @param colInfosOffset - the expressions are known to be represented by column infos + * beginning at this index + */ + private static void findBucketingSortingColumns(List exprs, + List colInfos, List bucketCols, List sortCols, + BucketCol[] newBucketCols, SortCol[] newSortCols, int colInfosOffset) { + for(int cnt = 0; cnt < exprs.size(); cnt++) { + ExprNodeDesc expr = exprs.get(cnt); + + // Only columns can be sorted/bucketed, in particular applying a function to a column + // voids any assumptions + if (!(expr instanceof ExprNodeColumnDesc)) { + continue; + } + + ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc)expr; + + int colInfosIndex = cnt + colInfosOffset; + + if (newBucketCols != null) { + int bucketIndex = indexOfColName(bucketCols, columnExpr.getColumn()); + if (bucketIndex != -1) { + if (newBucketCols[bucketIndex] == null) { + newBucketCols[bucketIndex] = new BucketCol(); + } + newBucketCols[bucketIndex].addAlias( + colInfos.get(colInfosIndex).getInternalName(), colInfosIndex); + } + } + + if (newSortCols != null) { + int sortIndex = indexOfColName(sortCols, columnExpr.getColumn()); + if (sortIndex != -1) { + if (newSortCols[sortIndex] == null) { + newSortCols[sortIndex] = new SortCol(sortCols.get(sortIndex).getSortOrder()); + } + newSortCols[sortIndex].addAlias( + colInfos.get(colInfosIndex).getInternalName(), colInfosIndex); + } + } + } + } + + /** + * Processor for Select operator. + */ + public static class SelectInferrer extends DefaultInferrer implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx; + SelectOperator sop = (SelectOperator)nd; + + Operator parent = getParent(stack); + + // if this is a selStarNoCompute then this select operator + // is treated like a default operator, so just call the super classes + // process method. + if (sop.getConf().isSelStarNoCompute()) { + processForward(sop, bctx, parent); + return null; + } + + List bucketCols = bctx.getBucketedCols(parent); + List sortCols = bctx.getSortedCols(parent); + List colInfos = sop.getSchema().getSignature(); + + if (bucketCols == null && sortCols == null) { + return null; + } + + BucketCol[] newBucketCols = null; + SortCol[] newSortCols = null; + if (bucketCols != null) { + newBucketCols = new BucketCol[bucketCols.size()]; + } + if (sortCols != null) { + newSortCols = new SortCol[sortCols.size()]; + } + + findBucketingSortingColumns(sop.getConf().getColList(), colInfos, bucketCols, sortCols, + newBucketCols, newSortCols); + + setBucketingColsIfComplete(bctx, sop, newBucketCols); + + setSortingColsIfComplete(bctx, sop, newSortCols); + + return null; + } + + } + + /** + * Find the BucketSortCol which has colName as one of its aliases. Returns the index of that + * BucketSortCol, or -1 if none exist + * @param bucketSortCols + * @param colName + * @return + */ + private static int indexOfColName(List bucketSortCols, String colName) { + for (int index = 0; index < bucketSortCols.size(); index++) { + BucketSortCol bucketSortCol = bucketSortCols.get(index); + if (bucketSortCol.getNames().indexOf(colName) != -1) { + return index; + } + } + + return -1; + } + + /** + * This is used to construct new lists of bucketed columns where the order of the columns + * hasn't changed, only possibly the name + * @param bucketCols - input bucketed columns + * @param colInfos - List of column infos + * @return output bucketed columns + */ + private static List getNewBucketCols(List bucketCols, + List colInfos) { + + List newBucketCols = new ArrayList(bucketCols.size()); + for (int i = 0; i < bucketCols.size(); i++) { + BucketCol bucketCol = new BucketCol(); + for (Integer index : bucketCols.get(i).getIndexes()) { + // The only time this condition should be false is in the case of dynamic partitioning + // where the data is bucketed on a dynamic partitioning column and the FileSinkOperator is + // being processed. In this case, the dynamic partition column will not appear in + // colInfos, and due to the limitations of dynamic partitioning, they will appear at the + // end of the input schema. Since the order of the columns hasn't changed, and no new + // columns have been added/removed, it is safe to assume that these will have indexes + // greater than or equal to colInfos.size(). + if (index < colInfos.size()) { + bucketCol.addAlias(colInfos.get(index).getInternalName(), index); + } else { + return null; + } + } + newBucketCols.add(bucketCol); + } + return newBucketCols; + } + + /** + * This is used to construct new lists of sorted columns where the order of the columns + * hasn't changed, only possibly the name + * @param bucketCols - input sorted columns + * @param colInfos - List of column infos + * @return output sorted columns + */ + private static List getNewSortCols(List sortCols, List colInfos) { + List newSortCols = new ArrayList(sortCols.size()); + for (int i = 0; i < sortCols.size(); i++) { + SortCol sortCol = new SortCol(sortCols.get(i).getSortOrder()); + for (Integer index : sortCols.get(i).getIndexes()) { + // The only time this condition should be false is in the case of dynamic partitioning + if (index < colInfos.size()) { + sortCol.addAlias(colInfos.get(index).getInternalName(), index); + } else { + return null; + } + } + newSortCols.add(sortCol); + } + return newSortCols; + } + + /** + * Processor for FileSink operator. + */ + public static class FileSinkInferrer extends DefaultInferrer implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx; + FileSinkOperator fop = (FileSinkOperator)nd; + + Operator parent = getParent(stack); + List bucketCols = bctx.getBucketedCols(parent); + List colInfos = fop.getSchema().getSignature(); + + // Set the inferred bucket columns for the file this FileSink produces + if (bucketCols != null) { + List newBucketCols = getNewBucketCols(bucketCols, colInfos); + bctx.getBucketedColsByDirectory().put(fop.getConf().getDirName(), newBucketCols); + bctx.setBucketedCols(fop, newBucketCols); + } + + List sortCols = bctx.getSortedCols(parent); + + // Set the inferred sort columns for the file this FileSink produces + if (sortCols != null) { + List newSortCols = getNewSortCols(sortCols, colInfos); + bctx.getSortedColsByDirectory().put(fop.getConf().getDirName(), newSortCols); + bctx.setSortedCols(fop, newSortCols); + } + + return null; + } + + } + + /** + * Processor for Extract operator. + * + * Only handles the case where the tree looks like + * + * ReduceSinkOperator --- ExtractOperator + * + * This is the case for distribute by, sort by, order by, cluster by operators. + */ + public static class ExtractInferrer extends DefaultInferrer implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx; + ExtractOperator exop = (ExtractOperator)nd; + + // As of writing this, there is no case where this could be false, this is just protection + // from possible future changes + if (exop.getParentOperators().size() != 1) { + return null; + } + + Operator parent = exop.getParentOperators().get(0); + + // The caller of this method should guarantee this + assert(parent instanceof ReduceSinkOperator); + + ReduceSinkOperator rop = (ReduceSinkOperator)parent; + + // Go through the set of partition columns, and find their representatives in the values + // These represent the bucketed columns + List bucketCols = new ArrayList(); + for (int i = 0; i < rop.getConf().getPartitionCols().size(); i++) { + boolean valueColFound = false; + for (int j = 0; j < rop.getConf().getValueCols().size(); j++) { + if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals( + new ExprNodeDescEqualityWrapper(rop.getConf().getPartitionCols().get(i)))) { + + bucketCols.add(new BucketCol( + rop.getSchema().getSignature().get(j).getInternalName(), j)); + valueColFound = true; + break; + } + } + + // If the partition columns can't all be found in the values then the data is not bucketed + if (!valueColFound) { + bucketCols.clear(); + break; + } + } + + // Go through the set of key columns, and find their representatives in the values + // These represent the sorted columns + String sortOrder = rop.getConf().getOrder(); + List sortCols = new ArrayList(); + for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) { + boolean valueColFound = false; + for (int j = 0; j < rop.getConf().getValueCols().size(); j++) { + if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals( + new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get(i)))) { + + sortCols.add(new SortCol( + rop.getSchema().getSignature().get(j).getInternalName(), j, sortOrder.charAt(i))); + valueColFound = true; + break; + } + } + + // If the sorted columns can't all be found in the values then the data is only sorted on + // the columns seen up until now + if (!valueColFound) { + break; + } + } + + List colInfos = exop.getSchema().getSignature(); + + if (!bucketCols.isEmpty()) { + List newBucketCols = getNewBucketCols(bucketCols, colInfos); + bctx.setBucketedCols(exop, newBucketCols); + } + + if (!sortCols.isEmpty()) { + List newSortCols = getNewSortCols(sortCols, colInfos); + bctx.setSortedCols(exop, newSortCols); + } + + return null; + } + } + + /** + * Processor for GroupByOperator, the special case where it follows a ForwardOperator + * + * There is a multi group by optimization which puts multiple group by operators in a + * reducer when they share the same keys and are part of a multi insert query. + * + * In this case the tree should look like + * Group By Operator + * / + * ReduceSinkOperator - ForwardOperator --- ... + * \ + * GroupByOperator + * + */ + + public static class MultiGroupByInferrer extends GroupByInferrer implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx; + GroupByOperator gop = (GroupByOperator)nd; + + if (gop.getParentOperators().size() != 1) { + return null; + } + + Operator fop = gop.getParentOperators().get(0); + + // The caller of this method should guarantee this + assert(fop instanceof ForwardOperator); + + if (fop.getParentOperators().size() != 1) { + return null; + } + + Operator rop = fop.getParentOperators().get(0); + + // The caller of this method should guarantee this + assert(rop instanceof ReduceSinkOperator); + + processGroupByReduceSink((ReduceSinkOperator) rop, gop, bctx); + + processForward(fop, bctx, rop); + + return processGroupBy(fop, gop, bctx); + } + } + + /** + * Processor for GroupBy operator. + * + * This handles the standard use of a group by operator, the tree should look like + * + * ReduceSinkOperator --- GroupByOperator + * + * It is up to the caller to guarantee the tree matches this pattern. + */ + public static class GroupByInferrer extends DefaultInferrer implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx; + GroupByOperator gop = (GroupByOperator)nd; + + // As of writing this, there is no case where this could be false, this is just protection + // from possible future changes + if (gop.getParentOperators().size() != 1) { + return null; + } + + Operator rop = gop.getParentOperators().get(0); + + // The caller of this method should guarantee this + assert(rop instanceof ReduceSinkOperator); + + processGroupByReduceSink((ReduceSinkOperator) rop, gop, bctx); + + return processGroupBy((ReduceSinkOperator)rop , gop, bctx); + } + + /** + * Process the ReduceSinkOperator preceding a GroupByOperator to determine which columns + * are bucketed and sorted. + * + * @param rop + * @param gop + * @param bctx + */ + protected void processGroupByReduceSink(ReduceSinkOperator rop, GroupByOperator gop, + BucketingSortingCtx bctx){ + + String sortOrder = rop.getConf().getOrder(); + List bucketCols = new ArrayList(); + List sortCols = new ArrayList(); + assert rop.getConf().getKeyCols().size() <= rop.getSchema().getSignature().size(); + // Group by operators select the key cols, so no need to find them in the values + for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) { + String colName = rop.getSchema().getSignature().get(i).getInternalName(); + bucketCols.add(new BucketCol(colName, i)); + sortCols.add(new SortCol(colName, i, sortOrder.charAt(i))); + } + bctx.setBucketedCols(rop, bucketCols); + bctx.setSortedCols(rop, sortCols); + } + + /** + * Process a GroupByOperator to determine which if any columns the output is bucketed and + * sorted by, assumes the columns output by the parent which are bucketed and sorted have + * already been determined. + * + * @param parent + * @param gop + * @param bctx + * @return + */ + protected Object processGroupBy(Operator parent, GroupByOperator gop, + BucketingSortingCtx bctx) { + List bucketCols = bctx.getBucketedCols(parent); + List sortCols = bctx.getSortedCols(parent); + List colInfos = gop.getSchema().getSignature(); + + if (bucketCols == null) { + assert sortCols == null; + return null; + } + + if (bucketCols.isEmpty()) { + assert sortCols.isEmpty(); + return null; + } + + BucketCol[] newBucketCols = new BucketCol[bucketCols.size()]; + SortCol[] newSortCols = new SortCol[sortCols.size()]; + + findBucketingSortingColumns(gop.getConf().getKeys(), colInfos, bucketCols, sortCols, + newBucketCols, newSortCols); + + setBucketingColsIfComplete(bctx, gop, newBucketCols); + + setSortingColsIfComplete(bctx, gop, newSortCols); + + return null; + } + } + + /** + * Filter processor + */ + public static class ForwardingInferrer extends DefaultInferrer implements NodeProcessor { + @SuppressWarnings("unchecked") + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + processForward((Operator)nd, (BucketingSortingCtx)procCtx, + getParent(stack)); + + return null; + } + } + + public static NodeProcessor getDefaultProc() { + return new DefaultInferrer(); + } + + public static NodeProcessor getJoinProc() { + return new JoinInferrer(); + } + + public static NodeProcessor getSelProc() { + return new SelectInferrer(); + } + + public static NodeProcessor getGroupByProc() { + return new GroupByInferrer(); + } + + public static NodeProcessor getFileSinkProc() { + return new FileSinkInferrer(); + } + + public static NodeProcessor getExtractProc() { + return new ExtractInferrer(); + } + + public static NodeProcessor getFilterProc() { + return new ForwardingInferrer(); + } + + public static NodeProcessor getLimitProc() { + return new ForwardingInferrer(); + } + + public static NodeProcessor getLateralViewForwardProc() { + return new ForwardingInferrer(); + } + + public static NodeProcessor getLateralViewJoinProc() { + return new ForwardingInferrer(); + } + + public static NodeProcessor getForwardProc() { + return new ForwardingInferrer(); + } + + public static NodeProcessor getMultiGroupByProc() { + return new MultiGroupByInferrer(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (working copy) @@ -59,6 +59,13 @@ if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { resolvers.add(new MetadataOnlyOptimizer()); } + + // Physical optimizers which follow this need to be careful not to invalidate the inferences + // made by this optimizer. Only optimizers which depend on the results of this one should + // follow it. + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT)) { + resolvers.add(new BucketingSortingInferenceOptimizer()); + } } /** Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 1440043) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy) @@ -31,6 +31,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SplitSample; @@ -93,6 +95,18 @@ private transient boolean useBucketizedHiveInputFormat; + // if this is true, this means that this is the map reduce task which writes the final data, + // ignoring the optional merge task + private boolean finalMapRed = false; + + // If this map reduce task has a FileSinkOperator, and bucketing/sorting metadata can be + // inferred about the data being written by that operator, these are mappings from the directory + // that operator writes into to the bucket/sort columns for that data. + private final Map> bucketedColsByDirectory = + new HashMap>(); + private final Map> sortedColsByDirectory = + new HashMap>(); + public MapredWork() { aliasToPartnInfo = new LinkedHashMap(); } @@ -279,6 +293,16 @@ this.numReduceTasks = numReduceTasks; } + @Explain(displayName = "Path -> Bucketed Columns", normalExplain = false) + public Map> getBucketedColsByDirectory() { + return bucketedColsByDirectory; + } + + @Explain(displayName = "Path -> Sorted Columns", normalExplain = false) + public Map> getSortedColsByDirectory() { + return sortedColsByDirectory; + } + @SuppressWarnings("nls") public void addMapWork(String path, String alias, Operator work, PartitionDesc pd) { @@ -525,4 +549,12 @@ public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat) { this.useBucketizedHiveInputFormat = useBucketizedHiveInputFormat; } + + public boolean isFinalMapRed() { + return finalMapRed; + } + + public void setFinalMapRed(boolean finalMapRed) { + this.finalMapRed = finalMapRed; + } } Index: ql/src/test/queries/clientnegative/merge_negative_3.q =================================================================== --- ql/src/test/queries/clientnegative/merge_negative_3.q (revision 0) +++ ql/src/test/queries/clientnegative/merge_negative_3.q (working copy) @@ -0,0 +1,6 @@ +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; + +create table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE; +insert overwrite table srcpart2 partition (ds='2011') select * from src; +alter table srcpart2 partition (ds = '2011') concatenate; Index: ql/src/test/queries/clientpositive/infer_bucket_sort.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort.q (working copy) @@ -0,0 +1,158 @@ +set hive.exec.infer.bucket.sort=true; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING); + +-- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by where a key isn't selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key, value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join with two keys and only one selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key); + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value); + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test distribute by, should only be bucketed by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src DISTRIBUTE BY key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test sort by, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src SORT BY key ASC; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test sort by desc, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src SORT BY key DESC; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test cluster by, should be bucketed and sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src CLUSTER BY key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src DISTRIBUTE BY key SORT BY value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery with where outside, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery with expression on value, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery with lateral view outside, should still be bucketed and sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery with select on outside reordering the columns, should be bucketed and +-- sorted by the column the group by key ends up in +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery followed by sort by, should only be sorted by the sort key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery followed by transform script, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_bucketed_table.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_bucketed_table.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_bucketed_table.q (working copy) @@ -0,0 +1,26 @@ +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.infer.bucket.sort=true; + +-- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the +-- a number of files equal to the number of buckets +CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS; + +-- Despite the fact that normally inferring would say this table is bucketed and sorted on key, +-- this should be bucketed and sorted by value into 3 buckets +INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') +SELECT key, count(1) FROM src GROUP BY KEY; + +DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1'); + +-- If the count(*) from sampling the buckets matches the count(*) from each file, the table is +-- bucketed +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1'; + +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1'; + +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1'; + +SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' +GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a; \ No newline at end of file Index: ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q (working copy) @@ -0,0 +1,25 @@ +set hive.exec.infer.bucket.sort=true; +set hive.exec.infer.bucket.sort.num.buckets.power.two=true; +set hive.auto.convert.join=true; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where joins may be auto converted to map joins. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING); + +-- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +set hive.mapjoin.check.memory.rows=1; +set hive.mapjoin.localtask.max.memory.usage = 0.0001; + +-- Tests a join which is not converted to a map join, the output should be bucketed and sorted + +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q (working copy) @@ -0,0 +1,87 @@ +set hive.exec.infer.bucket.sort=true; +set hive.exec.infer.bucket.sort.num.buckets.power.two=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where dynamic partitioning is used. + +CREATE TABLE test_table LIKE srcpart; +ALTER TABLE test_table SET FILEFORMAT RCFILE; + +-- Simple case, this should not be bucketed or sorted + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, value, ds, hr FROM srcpart +WHERE ds = '2008-04-08'; + +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11'); +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12'); + +-- This should not be bucketed or sorted since the partition keys are in the set of bucketed +-- and sorted columns for the output + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, COUNT(*), ds, hr FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key, ds, hr; + +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11'); +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12'); + +-- Both partitions should be bucketed and sorted by key + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, value, '2008-04-08', IF (key % 2 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a; + +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11'); +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12'); + +CREATE TABLE srcpart_merge_dp LIKE srcpart; + +CREATE TABLE srcpart_merge_dp_rc LIKE srcpart; +ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE; + +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11); +LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11); +LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11); + +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12); + +INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr) +SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08'; + +set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; +set hive.merge.smallfiles.avgsize=200; +set hive.exec.compress.output=false; +set hive.exec.dynamic.partition=true; +set mapred.reduce.tasks=2; + +-- Tests dynamic partitions where bucketing/sorting can be inferred, but some partitions are +-- merged and some are moved. Currently neither should be bucketed or sorted, in the future, +-- (ds='2008-04-08', hr='12') may be bucketed and sorted, (ds='2008-04-08', hr='11') should +-- definitely not be. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key, value, IF (key % 100 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a; + +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key, value, IF (key % 100 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a; + +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11'); +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q (working copy) @@ -0,0 +1,57 @@ +set hive.exec.infer.bucket.sort=true; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- the grouping operators rollup/cube/grouping sets + +CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING); + +CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING); + +-- Test rollup, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP; + +INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP; + +DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); + +-- Test rollup, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP; + +DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1'); + +-- Test cube, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE; + +INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE; + +DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); + +-- Test cube, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE; + +DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1'); + +-- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value); + +INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value); + +DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); + +-- Test grouping sets, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value); + +DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q (working copy) @@ -0,0 +1,33 @@ +set hive.mapred.supports.subdirectories=true; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set mapred.input.dir.recursive=true; + +-- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +-- create a skewed table +CREATE TABLE list_bucketing_table (key STRING, value STRING) +PARTITIONED BY (part STRING) +SKEWED BY (key) ON ("484") +STORED AS DIRECTORIES; + +-- Tests group by, the output should neither be bucketed nor sorted + +INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key; + +DESC FORMATTED list_bucketing_table PARTITION (part = '1'); + +-- create a table skewed on a key which doesnt exist in the data +CREATE TABLE list_bucketing_table2 (key STRING, value STRING) +PARTITIONED BY (part STRING) +SKEWED BY (key) ON ("abc") +STORED AS DIRECTORIES; + +-- should not be bucketed or sorted +INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key; + +DESC FORMATTED list_bucketing_table2 PARTITION (part = '1'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q (working copy) @@ -0,0 +1,74 @@ +set hive.exec.infer.bucket.sort=true; +set hive.enforce.bucketing = true; +set hive.enforce.sorting = true; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- that operators in the mapper have no effect + +CREATE TABLE test_table1 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS; + +CREATE TABLE test_table2 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS; + +INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src; + +INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src; + +CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING); + +set hive.map.groupby.sorted=true; + +-- Test map group by doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key; + +INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key; + +DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); + +-- Test map group by doesn't affect inference, should be bucketed and sorted by value +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, count(*) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value); + +INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value); + +DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); + +set hive.map.groupby.sorted=false; +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; + +-- Test SMB join doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key; + +INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key; + +DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); + +-- Test SMB join doesn't affect inference, should be bucketed and sorted by key +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value; + +INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value; + +DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); + Index: ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q (working copy) @@ -0,0 +1,25 @@ +set hive.exec.infer.bucket.sort=true; +set hive.exec.infer.bucket.sort.num.buckets.power.two=true; +set hive.merge.mapredfiles=true; +set mapred.reduce.tasks=2; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where where merging may or may not be used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING); + +-- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +set hive.merge.smallfiles.avgsize=2; +set hive.exec.compress.output=false; + +-- Tests a reduce task followed by a move. The output should be bucketed and sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q (working copy) @@ -0,0 +1,46 @@ +set hive.exec.infer.bucket.sort=true; +set hive.exec.infer.bucket.sort.num.buckets.power.two=true; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where multi insert is used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING); + +-- Simple case, neither partition should be bucketed or sorted + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); +DESCRIBE FORMATTED test_table PARTITION (part = '2'); + +-- The partitions should be bucketed and sorted by different keys + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); +DESCRIBE FORMATTED test_table PARTITION (part = '2'); + +-- The first partition should be bucketed and sorted, the second should not + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); +DESCRIBE FORMATTED test_table PARTITION (part = '2'); + +set hive.multigroupby.singlereducer=true; + +-- Test the multi group by single reducer optimization +-- Both partitions should be bucketed by key +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); +DESCRIBE FORMATTED test_table PARTITION (part = '2'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q (working copy) @@ -0,0 +1,37 @@ +set hive.exec.infer.bucket.sort=true; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set mapred.reduce.tasks=2; + +CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING); + +-- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write +-- all partitions. The subquery produces rows as follows +-- key = 0: +-- 0, , 0 +-- key = 1: +-- 0, , 1 +-- key = 2: +-- 1, , 0 +-- This means that by distributing by the first column into two reducers, and using the third +-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers +-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key +-- and hr=1 should not. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2; + +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2; + +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0'); +DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1'); Index: ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q =================================================================== --- ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q (revision 0) +++ ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q (working copy) @@ -0,0 +1,46 @@ +set hive.exec.infer.bucket.sort=true; +set hive.exec.infer.bucket.sort.num.buckets.power.two=true; +set hive.exec.reducers.bytes.per.reducer=2500; + +-- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, it also verifies that the +-- number of reducers chosen will be a power of two + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING); + +-- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key); + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value); + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); + +-- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value; + +DESCRIBE FORMATTED test_table PARTITION (part = '1'); Index: ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q =================================================================== --- ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q (revision 1440043) +++ ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q (working copy) @@ -1,5 +1,7 @@ create table t1( key_int1 int, key_int2 int, key_string1 string, key_string2 string); +set hive.optimize.reducededuplication=false; + set hive.map.aggr=false; select Q1.key_int1, sum(Q1.key_int1) from (select * from t1 cluster by key_int1) Q1 group by Q1.key_int1; Index: ql/src/test/results/clientnegative/merge_negative_3.q.out =================================================================== --- ql/src/test/results/clientnegative/merge_negative_3.q.out (revision 0) +++ ql/src/test/results/clientnegative/merge_negative_3.q.out (working copy) @@ -0,0 +1,16 @@ +PREHOOK: query: create table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table srcpart2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets stored as RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcpart2 +PREHOOK: query: insert overwrite table srcpart2 partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcpart2@ds=2011 +POSTHOOK: query: insert overwrite table srcpart2 partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcpart2@ds=2011 +POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart2 PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Merge can not perform on bucketized partition/table. Index: ql/src/test/results/clientpositive/ctas.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas.q.out (revision 1440043) +++ ql/src/test/results/clientpositive/ctas.q.out (working copy) @@ -827,6 +827,8 @@ TotalFiles: 1 GatherStats: false MultiFileSpray: false + Path -> Sorted Columns: +#### A masked pattern was here #### Truncated Path -> Alias: /src [src] @@ -891,6 +893,8 @@ TotalFiles: 1 GatherStats: true MultiFileSpray: false + Path -> Sorted Columns: +#### A masked pattern was here #### Truncated Path -> Alias: #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/infer_bucket_sort.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort.q.out (working copy) @@ -0,0 +1,2565 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key, value] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 1028 + rawDataSize 4970 + totalSize 5998 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 2654 + rawDataSize 28466 + totalSize 31120 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 2654 + rawDataSize 28466 + totalSize 31120 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test distribute by, should only be bucketed by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src DISTRIBUTE BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test distribute by, should only be bucketed by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src DISTRIBUTE BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test sort by, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src SORT BY key ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test sort by, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src SORT BY key ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test sort by desc, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src SORT BY key DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test sort by desc, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src SORT BY key DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [Order(col:key, order:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test cluster by, should be bucketed and sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src CLUSTER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test cluster by, should be bucketed and sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src CLUSTER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src DISTRIBUTE BY key SORT BY value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM src DISTRIBUTE BY key SORT BY value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 6 + rawDataSize 18 + totalSize 24 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 6 + rawDataSize 18 + totalSize 24 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 618 + rawDataSize 2964 + totalSize 3582 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 5 + rawDataSize 19 + totalSize 24 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and +-- sorted by the column the group by key ends up in +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and +-- sorted by the column the group by key ends up in +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 3582 + totalSize 3891 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key, value] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_bucketed_table.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_bucketed_table.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_bucketed_table.q.out (working copy) @@ -0,0 +1,127 @@ +PREHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the +-- a number of files equal to the number of buckets +CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the +-- a number of files equal to the number of buckets +CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table_bucketed +PREHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, +-- this should be bucketed and sorted by value into 3 buckets +INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') +SELECT key, count(1) FROM src GROUP BY KEY +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_bucketed@part=1 +POSTHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, +-- this should be bucketed and sorted by value into 3 buckets +INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') +SELECT key, count(1) FROM src GROUP BY KEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_bucketed@part=1 +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_bucketed +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 3 + numRows 0 + rawDataSize 0 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 3 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is +-- bucketed +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is +-- bucketed +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +31 +PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +179 +PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +99 +PREHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' +GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' +GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +31 +179 +99 Index: ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out (working copy) @@ -0,0 +1,139 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where joins may be auto converted to map joins. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where joins may be auto converted to map joins. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Tests a join which is not converted to a map join, the output should be bucketed and sorted + +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +Execution failed with exit status: 3 +Obtaining error information + +Task failed! +Task ID: + Stage-7 + +Logs: + +#### A masked pattern was here #### +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.MapredLocalTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.MapRedTask +POSTHOOK: query: -- Tests a join which is not converted to a map join, the output should be bucketed and sorted + +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out (working copy) @@ -0,0 +1,902 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where dynamic partitioning is used. + +CREATE TABLE test_table LIKE srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where dynamic partitioning is used. + +CREATE TABLE test_table LIKE srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table +PREHOOK: query: ALTER TABLE test_table SET FILEFORMAT RCFILE +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@test_table +PREHOOK: Output: default@test_table +POSTHOOK: query: ALTER TABLE test_table SET FILEFORMAT RCFILE +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@test_table +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Simple case, this should not be bucketed or sorted + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, value, ds, hr FROM srcpart +WHERE ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@test_table +POSTHOOK: query: -- Simple case, this should not be bucketed or sorted + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, value, ds, hr FROM srcpart +WHERE ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5293 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5293 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- This should not be bucketed or sorted since the partition keys are in the set of bucketed +-- and sorted columns for the output + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, COUNT(*), ds, hr FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key, ds, hr +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@test_table +POSTHOOK: query: -- This should not be bucketed or sorted since the partition keys are in the set of bucketed +-- and sorted columns for the output + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, COUNT(*), ds, hr FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key, ds, hr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 1342 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 1342 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Both partitions should be bucketed and sorted by key + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, value, '2008-04-08', IF (key % 2 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@test_table +POSTHOOK: query: -- Both partitions should be bucketed and sorted by key + +INSERT OVERWRITE TABLE test_table PARTITION (ds, hr) +SELECT key, value, '2008-04-08', IF (key % 2 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 719 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 722 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: CREATE TABLE srcpart_merge_dp LIKE srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcpart_merge_dp LIKE srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcpart_merge_dp +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: CREATE TABLE srcpart_merge_dp_rc LIKE srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcpart_merge_dp_rc LIKE srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcpart_merge_dp_rc +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@srcpart_merge_dp_rc +PREHOOK: Output: default@srcpart_merge_dp_rc +POSTHOOK: query: ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@srcpart_merge_dp_rc +POSTHOOK: Output: default@srcpart_merge_dp_rc +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: LOAD +PREHOOK: Output: default@srcpart_merge_dp +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcpart_merge_dp +POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: LOAD +PREHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: LOAD +PREHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: LOAD +PREHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: LOAD +PREHOOK: Output: default@srcpart_merge_dp +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcpart_merge_dp +POSTHOOK: Output: default@srcpart_merge_dp@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr) +SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_merge_dp +PREHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_merge_dp_rc@ds=2008-04-08 +POSTHOOK: query: INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr) +SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_merge_dp +POSTHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_merge_dp@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but some partitions are +-- merged and some are moved. Currently neither should be bucketed or sorted, in the future, +-- (ds='2008-04-08', hr='12') may be bucketed and sorted, (ds='2008-04-08', hr='11') should +-- definitely not be. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key, value, IF (key % 100 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but some partitions are +-- merged and some are moved. Currently neither should be bucketed or sorted, in the future, +-- (ds='2008-04-08', hr='12') may be bucketed and sorted, (ds='2008-04-08', hr='11') should +-- definitely not be. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key, value, IF (key % 100 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a +POSTHOOK: type: QUERY +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION IF (== (% (TOK_TABLE_OR_COL key) 100) 0) '11' '12'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:srcpart + TableScan + alias: srcpart + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: if(((_col0 % 100) = 0), '11', '12') + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToString(_col1) + type: string + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.test_table + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.test_table + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Block level merge + + Stage: Stage-5 + Block level merge + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key, value, IF (key % 100 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@test_table@ds=2008-04-08 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key, value, IF (key % 100 == 0, '11', '12') FROM +(SELECT key, COUNT(*) AS value FROM srcpart +WHERE ds = '2008-04-08' +GROUP BY key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 115 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_merge_dp_rc PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart_merge_dp)srcpart_merge_dp.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=11).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=12).value EXPRESSION [(srcpart)srcpart.null, ] +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 2 + numRows 0 + rawDataSize 0 + totalSize 1427 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out (working copy) @@ -0,0 +1,825 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- the grouping operators rollup/cube/grouping sets + +CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- the grouping operators rollup/cube/grouping sets + +CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table_out +PREHOOK: query: CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table_out_2 +PREHOOK: query: -- Test rollup, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: -- Test rollup, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None +agg string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 619 + rawDataSize 6309 + totalSize 6928 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_out_2@part=1 +POSTHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_out_2@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None +grouping_key string None +agg string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out_2 +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 619 + rawDataSize 7547 + totalSize 8166 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key, value, grouping_key] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test cube, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE +PREHOOK: type: QUERY +POSTHOOK: query: -- Test cube, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None +agg string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 928 + rawDataSize 9954 + totalSize 10882 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_out_2@part=1 +POSTHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_out_2@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None +grouping_key string None +agg string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out_2 +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 928 + rawDataSize 11810 + totalSize 12738 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key, value, grouping_key] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) +PREHOOK: type: QUERY +POSTHOOK: query: -- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL key)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None +agg string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 618 + rawDataSize 6054 + totalSize 6672 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_out_2@part=1 +POSTHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_out_2@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).agg EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).grouping_key SIMPLE [] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None +grouping_key string None +agg string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out_2 +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 618 + rawDataSize 7290 + totalSize 7908 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key, value, grouping_key] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out (working copy) @@ -0,0 +1,158 @@ +PREHOOK: query: -- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +-- create a skewed table +CREATE TABLE list_bucketing_table (key STRING, value STRING) +PARTITIONED BY (part STRING) +SKEWED BY (key) ON ("484") +STORED AS DIRECTORIES +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +-- create a skewed table +CREATE TABLE list_bucketing_table (key STRING, value STRING) +PARTITIONED BY (part STRING) +SKEWED BY (key) ON ("484") +STORED AS DIRECTORIES +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@list_bucketing_table +PREHOOK: query: -- Tests group by, the output should neither be bucketed nor sorted + +INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_table@part=1 +POSTHOOK: query: -- Tests group by, the output should neither be bucketed nor sorted + +INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_table@part=1 +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESC FORMATTED list_bucketing_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED list_bucketing_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: list_bucketing_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 2 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484]=/list_bucketing_table/part=1/key=484} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- create a table skewed on a key which doesnt exist in the data +CREATE TABLE list_bucketing_table2 (key STRING, value STRING) +PARTITIONED BY (part STRING) +SKEWED BY (key) ON ("abc") +STORED AS DIRECTORIES +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- create a table skewed on a key which doesnt exist in the data +CREATE TABLE list_bucketing_table2 (key STRING, value STRING) +PARTITIONED BY (part STRING) +SKEWED BY (key) ON ("abc") +STORED AS DIRECTORIES +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@list_bucketing_table2 +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: -- should not be bucketed or sorted +INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_table2@part=1 +POSTHOOK: query: -- should not be bucketed or sorted +INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_table2@part=1 +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESC FORMATTED list_bucketing_table2 PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED list_bucketing_table2 PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: list_bucketing_table2 +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 136 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[abc]] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out (working copy) @@ -0,0 +1,846 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- that operators in the mapper have no effect + +CREATE TABLE test_table1 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- that operators in the mapper have no effect + +CREATE TABLE test_table1 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table_out +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Test map group by doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test map group by doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + test_table1 + TableScan + alias: test_table1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, count(*) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +PREHOOK: type: QUERY +POSTHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, count(*) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:test_table1 + TableScan + alias: test_table1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: UDFToDouble(_col1) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(_col1) + type: double + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + b:src + TableScan + alias: src + Select Operator + expressions: + expr: value + type: string + outputColumnNames: _col1 + Reduce Output Operator + key expressions: + expr: UDFToDouble(_col1) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(_col1) + type: double + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test SMB join doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test SMB join doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test SMB join doesn't affect inference, should be bucketed and sorted by key +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +PREHOOK: type: QUERY +POSTHOOK: query: -- Test SMB join doesn't affect inference, should be bucketed and sorted by key +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table_out) (TOK_PARTSPEC (TOK_PARTVAL part '1')))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) value)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col5 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Select Operator + expressions: + expr: _col5 + type: string + outputColumnNames: _col5 + Select Operator + expressions: + expr: _col5 + type: string + outputColumnNames: _col5 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col5 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)a.null, (test_table2)b.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)a.null, (test_table2)b.null, ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 3037 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out (working copy) @@ -0,0 +1,125 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where where merging may or may not be used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where where merging may or may not be used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 2 + numRows 0 + rawDataSize 0 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out (working copy) @@ -0,0 +1,509 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where multi insert is used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where multi insert is used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Simple case, neither partition should be bucketed or sorted + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +PREHOOK: Output: default@test_table@part=2 +POSTHOOK: query: -- Simple case, neither partition should be bucketed or sorted + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Output: default@test_table@part=2 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- The partitions should be bucketed and sorted by different keys + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +PREHOOK: Output: default@test_table@part=2 +POSTHOOK: query: -- The partitions should be bucketed and sorted by different keys + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Output: default@test_table@part=2 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 2718 + totalSize 3027 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- The first partition should be bucketed and sorted, the second should not + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +PREHOOK: Output: default@test_table@part=2 +POSTHOOK: query: -- The first partition should be bucketed and sorted, the second should not + +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Output: default@test_table@part=2 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test the multi group by single reducer optimization +-- Both partitions should be bucketed by key +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +PREHOOK: Output: default@test_table@part=2 +POSTHOOK: query: -- Test the multi group by single reducer optimization +-- Both partitions should be bucketed by key +FROM src +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key +INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Output: default@test_table@part=2 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '2') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=2).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 309 + rawDataSize 2690 + totalSize 2999 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out (working copy) @@ -0,0 +1,234 @@ +PREHOOK: query: CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write +-- all partitions. The subquery produces rows as follows +-- key = 0: +-- 0, , 0 +-- key = 1: +-- 0, , 1 +-- key = 2: +-- 1, , 0 +-- This means that by distributing by the first column into two reducers, and using the third +-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers +-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key +-- and hr=1 should not. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write +-- all partitions. The subquery produces rows as follows +-- key = 0: +-- 0, , 0 +-- key = 1: +-- 0, , 1 +-- key = 2: +-- 1, , 0 +-- This means that by distributing by the first column into two reducers, and using the third +-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers +-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key +-- and hr=1 should not. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION if (< (% (TOK_TABLE_OR_COL key) 3) 2) 0 1) key2) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (% (TOK_TABLE_OR_COL key) 2) hr)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2008-04-08')))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key2)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL hr)))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key2)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:srcpart + TableScan + alias: srcpart + Select Operator + expressions: + expr: if(((key % 3) < 2), 0, 1) + type: int + expr: value + type: string + expr: (key % 2) + type: double + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@test_table@ds=2008-04-08 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=0 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=1 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key int None +value string None + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 0] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 2 + numRows 0 + rawDataSize 0 + totalSize 6558 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key int None +value string None + +# Partition Information +# col_name data_type comment + +ds string None +hr string None + +# Detailed Partition Information +Partition Value: [2008-04-08, 1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 3254 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out =================================================================== --- ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out (revision 0) +++ ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out (working copy) @@ -0,0 +1,399 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, it also verifies that the +-- number of reducers chosen will be a power of two + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, it also verifies that the +-- number of reducers chosen will be a power of two + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT key, count(*) FROM src GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key, value] +Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 31120 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 16 + numRows 0 + rawDataSize 0 + totalSize 31120 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 16 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 24 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/compiler/plan/case_sensitivity.q.xml =================================================================== --- ql/src/test/results/compiler/plan/case_sensitivity.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/case_sensitivity.q.xml (working copy) @@ -1266,6 +1266,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/cast1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/cast1.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/cast1.q.xml (working copy) @@ -1076,6 +1076,9 @@ + + true + Index: ql/src/test/results/compiler/plan/groupby1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby1.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/groupby1.q.xml (working copy) @@ -992,6 +992,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/groupby2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby2.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/groupby2.q.xml (working copy) @@ -1097,6 +1097,9 @@ + + true + Index: ql/src/test/results/compiler/plan/groupby3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby3.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/groupby3.q.xml (working copy) @@ -1296,6 +1296,9 @@ + + true + Index: ql/src/test/results/compiler/plan/groupby4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby4.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/groupby4.q.xml (working copy) @@ -746,6 +746,9 @@ + + true + Index: ql/src/test/results/compiler/plan/groupby5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby5.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/groupby5.q.xml (working copy) @@ -839,6 +839,9 @@ + + true + Index: ql/src/test/results/compiler/plan/groupby6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby6.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/groupby6.q.xml (working copy) @@ -746,6 +746,9 @@ + + true + Index: ql/src/test/results/compiler/plan/input1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input1.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input1.q.xml (working copy) @@ -1081,6 +1081,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/input2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input2.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input2.q.xml (working copy) @@ -2741,6 +2741,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/input20.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input20.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input20.q.xml (working copy) @@ -854,6 +854,9 @@ + + true + @@ -1490,21 +1493,21 @@ - CNTR_NAME_OP_4_NUM_INPUT_ROWS + CNTR_NAME_EX_4_NUM_INPUT_ROWS - CNTR_NAME_OP_4_NUM_OUTPUT_ROWS + CNTR_NAME_EX_4_NUM_OUTPUT_ROWS - CNTR_NAME_OP_4_TIME_TAKEN + CNTR_NAME_EX_4_TIME_TAKEN - CNTR_NAME_OP_4_FATAL_ERROR + CNTR_NAME_EX_4_FATAL_ERROR - OP_4 + EX_4 Index: ql/src/test/results/compiler/plan/input3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input3.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input3.q.xml (working copy) @@ -3407,6 +3407,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/input4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input4.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input4.q.xml (working copy) @@ -1004,6 +1004,9 @@ + + true + true @@ -1452,21 +1455,21 @@ - CNTR_NAME_OP_4_NUM_INPUT_ROWS + CNTR_NAME_EX_4_NUM_INPUT_ROWS - CNTR_NAME_OP_4_NUM_OUTPUT_ROWS + CNTR_NAME_EX_4_NUM_OUTPUT_ROWS - CNTR_NAME_OP_4_TIME_TAKEN + CNTR_NAME_EX_4_TIME_TAKEN - CNTR_NAME_OP_4_FATAL_ERROR + CNTR_NAME_EX_4_FATAL_ERROR - OP_4 + EX_4 Index: ql/src/test/results/compiler/plan/input5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input5.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input5.q.xml (working copy) @@ -1034,6 +1034,9 @@ + + true + true @@ -1490,21 +1493,21 @@ - CNTR_NAME_OP_4_NUM_INPUT_ROWS + CNTR_NAME_EX_4_NUM_INPUT_ROWS - CNTR_NAME_OP_4_NUM_OUTPUT_ROWS + CNTR_NAME_EX_4_NUM_OUTPUT_ROWS - CNTR_NAME_OP_4_TIME_TAKEN + CNTR_NAME_EX_4_TIME_TAKEN - CNTR_NAME_OP_4_FATAL_ERROR + CNTR_NAME_EX_4_FATAL_ERROR - OP_4 + EX_4 Index: ql/src/test/results/compiler/plan/input6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input6.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input6.q.xml (working copy) @@ -1067,6 +1067,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/input7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input7.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input7.q.xml (working copy) @@ -983,6 +983,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/input8.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input8.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input8.q.xml (working copy) @@ -651,6 +651,9 @@ + + true + Index: ql/src/test/results/compiler/plan/input9.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input9.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input9.q.xml (working copy) @@ -1060,6 +1060,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/input_part1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_part1.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input_part1.q.xml (working copy) @@ -782,6 +782,9 @@ + + true + Index: ql/src/test/results/compiler/plan/input_testsequencefile.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (working copy) @@ -991,6 +991,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/input_testxpath.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_testxpath.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input_testxpath.q.xml (working copy) @@ -763,6 +763,9 @@ + + true + Index: ql/src/test/results/compiler/plan/input_testxpath2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_testxpath2.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/input_testxpath2.q.xml (working copy) @@ -851,6 +851,9 @@ + + true + Index: ql/src/test/results/compiler/plan/join1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join1.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join1.q.xml (working copy) @@ -1073,6 +1073,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/join2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join2.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join2.q.xml (working copy) @@ -969,6 +969,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/join3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join3.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join3.q.xml (working copy) @@ -1494,6 +1494,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/join4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join4.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join4.q.xml (working copy) @@ -1467,6 +1467,9 @@ + + true + Index: ql/src/test/results/compiler/plan/join5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join5.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join5.q.xml (working copy) @@ -1467,6 +1467,9 @@ + + true + Index: ql/src/test/results/compiler/plan/join6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join6.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join6.q.xml (working copy) @@ -1467,6 +1467,9 @@ + + true + Index: ql/src/test/results/compiler/plan/join7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join7.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join7.q.xml (working copy) @@ -2188,6 +2188,9 @@ + + true + Index: ql/src/test/results/compiler/plan/join8.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join8.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/join8.q.xml (working copy) @@ -1549,6 +1549,9 @@ + + true + Index: ql/src/test/results/compiler/plan/sample1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample1.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/sample1.q.xml (working copy) @@ -903,6 +903,9 @@ + + true + Index: ql/src/test/results/compiler/plan/sample2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample2.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/sample2.q.xml (working copy) @@ -1193,6 +1193,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/sample3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample3.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/sample3.q.xml (working copy) @@ -1203,6 +1203,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/sample4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample4.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/sample4.q.xml (working copy) @@ -1193,6 +1193,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/sample5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample5.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/sample5.q.xml (working copy) @@ -1190,6 +1190,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/sample6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample6.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/sample6.q.xml (working copy) @@ -1193,6 +1193,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/sample7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample7.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/sample7.q.xml (working copy) @@ -1275,6 +1275,9 @@ + + true + true Index: ql/src/test/results/compiler/plan/subq.q.xml =================================================================== --- ql/src/test/results/compiler/plan/subq.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/subq.q.xml (working copy) @@ -1081,6 +1081,9 @@ + + true + Index: ql/src/test/results/compiler/plan/udf1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf1.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/udf1.q.xml (working copy) @@ -1902,6 +1902,9 @@ + + true + Index: ql/src/test/results/compiler/plan/udf4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf4.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/udf4.q.xml (working copy) @@ -1824,6 +1824,9 @@ + + true + Index: ql/src/test/results/compiler/plan/udf6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf6.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/udf6.q.xml (working copy) @@ -582,6 +582,9 @@ + + true + Index: ql/src/test/results/compiler/plan/udf_case.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf_case.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/udf_case.q.xml (working copy) @@ -676,6 +676,9 @@ + + true + Index: ql/src/test/results/compiler/plan/udf_when.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf_when.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/udf_when.q.xml (working copy) @@ -756,6 +756,9 @@ + + true + Index: ql/src/test/results/compiler/plan/union.q.xml =================================================================== --- ql/src/test/results/compiler/plan/union.q.xml (revision 1440043) +++ ql/src/test/results/compiler/plan/union.q.xml (working copy) @@ -1626,6 +1626,9 @@ + + true +