Index: cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java =================================================================== --- cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java (revision 1555253) +++ cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java (working copy) @@ -27,12 +27,12 @@ import java.io.IOException; import java.io.PrintStream; import java.io.UnsupportedEncodingException; +import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Set; -import java.sql.SQLException; import jline.ArgumentCompletor; import jline.ArgumentCompletor.AbstractArgumentDelimiter; @@ -271,10 +271,15 @@ return ret; } + // query has run capture the time + long end = System.currentTimeMillis(); + double timeTaken = (end - start) / 1000.0; + ArrayList res = new ArrayList(); printHeader(qp, out); + // print the results int counter = 0; try { while (qp.getResults(res)) { @@ -299,11 +304,8 @@ ret = cret; } - long end = System.currentTimeMillis(); - double timeTaken = (end - start) / 1000.0; console.printInfo("Time taken: " + timeTaken + " seconds" + (counter == 0 ? "" : ", Fetched: " + counter + " row(s)")); - } else { String firstToken = tokenizeCmd(cmd.trim())[0]; String cmd_1 = getFirstCmd(cmd.trim(), firstToken.length()); Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1555253) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -520,6 +520,10 @@ HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f), + HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false), + HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000), + HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10), + HIVESKEWJOIN("hive.optimize.skewjoin", false), HIVECONVERTJOIN("hive.auto.convert.join", true), HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true), Index: ql/src/java/org/apache/hadoop/hive/ql/HashTableLoaderFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/HashTableLoaderFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/HashTableLoaderFactory.java (working copy) @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.exec.HashTableLoader; + +/** + * HashTableLoaderFactory is used to determine the strategy + * of loading the hashtables for the MapJoinOperator + */ +public class HashTableLoaderFactory { + + private HashTableLoaderFactory() { + } + + public static HashTableLoader getLoader(Configuration hconf) { + return new org.apache.hadoop.hive.ql.exec.mr.HashTableLoader(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (working copy) @@ -149,4 +149,11 @@ protected boolean hasAnyNulls(MapJoinKey key) { return key.hasAnyNulls(nullsafes); } + + @Override + public void closeOp(boolean abort) throws HiveException { + super.closeOp(abort); + emptyList = null; + joinKeys = null; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java (working copy) @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; + +/** + * HashTableLoader is an interface used by MapJoinOperator used to load the hashtables + * needed to process the join. + */ +public interface HashTableLoader { + + void load(ExecMapperContext context, Configuration hconf, MapJoinDesc desc, byte posBigTable, + MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) + throws HiveException; +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -18,9 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.BufferedInputStream; -import java.io.FileInputStream; -import java.io.ObjectInputStream; import java.io.Serializable; import java.util.List; @@ -27,14 +24,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.filecache.DistributedCache; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.HashTableLoaderFactory; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -41,7 +37,6 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.util.ReflectionUtils; /** @@ -51,7 +46,15 @@ private static final long serialVersionUID = 1L; private static final Log LOG = LogFactory.getLog(MapJoinOperator.class.getName()); + private static final String CLASS_NAME = MapJoinOperator.class.getName(); + private final PerfLogger perfLogger = PerfLogger.getPerfLogger(); + private transient String tableKey; + private transient String serdeKey; + private transient ObjectCache cache; + + private HashTableLoader loader; + protected transient MapJoinTableContainer[] mapJoinTables; private transient MapJoinTableContainerSerDe[] mapJoinTableSerdes; private transient boolean hashTblInitedOnce; @@ -64,13 +67,41 @@ super(mjop); } + /* + * We need the base (operator.java) implementation of start/endGroup. + * The parent class has functionality in those that map join can't use. + */ @Override + public void endGroup() throws HiveException { + defaultEndGroup(); + } + + @Override + public void startGroup() throws HiveException { + defaultStartGroup(); + } + + @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); + int tagLen = conf.getTagLength(); - mapJoinTables = new MapJoinTableContainer[tagLen]; - mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen]; - hashTblInitedOnce = false; + + tableKey = "__HASH_MAP_"+this.getOperatorId()+"_container"; + serdeKey = "__HASH_MAP_"+this.getOperatorId()+"_serde"; + + cache = ObjectCacheFactory.getCache(hconf); + loader = HashTableLoaderFactory.getLoader(hconf); + + mapJoinTables = (MapJoinTableContainer[]) cache.retrieve(tableKey); + mapJoinTableSerdes = (MapJoinTableContainerSerDe[]) cache.retrieve(serdeKey); + hashTblInitedOnce = true; + + if (mapJoinTables == null || mapJoinTableSerdes == null) { + mapJoinTables = new MapJoinTableContainer[tagLen]; + mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen]; + hashTblInitedOnce = false; + } } public void generateMapMetaData() throws HiveException, SerDeException { @@ -101,7 +132,9 @@ } private void loadHashTable() throws HiveException { - if (!this.getExecContext().getLocalWork().getInputFileChangeSensitive()) { + + if (this.getExecContext().getLocalWork() == null + || !this.getExecContext().getLocalWork().getInputFileChangeSensitive()) { if (hashTblInitedOnce) { return; } else { @@ -108,51 +141,12 @@ hashTblInitedOnce = true; } } - - String baseDir = null; - String currentInputFile = getExecContext().getCurrentInputPath().toString(); - LOG.info("******* Load from HashTable File: input : " + currentInputFile); - String fileName = getExecContext().getLocalWork().getBucketFileName(currentInputFile); - try { - if (ShimLoader.getHadoopShims().isLocalMode(hconf)) { - baseDir = this.getExecContext().getLocalWork().getTmpFileURI(); - } else { - Path[] localArchives; - String stageID = this.getExecContext().getLocalWork().getStageID(); - String suffix = Utilities.generateTarFileName(stageID); - FileSystem localFs = FileSystem.getLocal(hconf); - localArchives = DistributedCache.getLocalCacheArchives(this.hconf); - Path archive; - for (int j = 0; j < localArchives.length; j++) { - archive = localArchives[j]; - if (!archive.getName().endsWith(suffix)) { - continue; - } - Path archiveLocalLink = archive.makeQualified(localFs); - baseDir = archiveLocalLink.toUri().getPath(); - } - } - for (int pos = 0; pos < mapJoinTables.length; pos++) { - if (pos == posBigTable) { - continue; - } - if(baseDir == null) { - throw new IllegalStateException("baseDir cannot be null"); - } - String filePath = Utilities.generatePath(baseDir, conf.getDumpFilePrefix(), (byte)pos, fileName); - Path path = new Path(filePath); - LOG.info("\tLoad back 1 hashtable file from tmp file uri:" + path); - ObjectInputStream in = new ObjectInputStream(new BufferedInputStream( - new FileInputStream(path.toUri().getPath()), 4096)); - try{ - mapJoinTables[pos] = mapJoinTableSerdes[pos].load(in); - } finally { - in.close(); - } - } - } catch (Exception e) { - throw new HiveException(e); - } + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE); + loader.load(this.getExecContext(), hconf, this.getConf(), + posBigTable, mapJoinTables, mapJoinTableSerdes); + cache.cache(tableKey, mapJoinTables); + cache.cache(serdeKey, mapJoinTableSerdes); + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.LOAD_HASHTABLE); } // Load the hash table @@ -179,8 +173,8 @@ public void processOp(Object row, int tag) throws HiveException { try { if (firstRow) { - // generate the map metadata generateMapMetaData(); + loadHashTable(); firstRow = false; } alias = (byte)tag; @@ -227,7 +221,9 @@ @Override public void closeOp(boolean abort) throws HiveException { - if (mapJoinTables != null) { + if ((this.getExecContext().getLocalWork() != null + && this.getExecContext().getLocalWork().getInputFileChangeSensitive()) + && mapJoinTables != null) { for (MapJoinTableContainer tableContainer : mapJoinTables) { if (tableContainer != null) { tableContainer.clear(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (working copy) @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -326,8 +327,7 @@ public void setChildren(Configuration hconf) throws HiveException { - Path fpath = new Path(HiveConf.getVar(hconf, - HiveConf.ConfVars.HADOOPMAPFILENAME)); + Path fpath = IOContext.get().getInputPath(); boolean schemeless = fpath.toUri().getScheme() == null; @@ -350,8 +350,10 @@ for (String onealias : aliases) { Operator op = conf.getAliasToWork().get(onealias); - LOG.info("Adding alias " + onealias + " to work list for file " - + onefile); + if (LOG.isDebugEnabled()) { + LOG.debug("Adding alias " + onealias + " to work list for file " + + onefile); + } MapInputPath inp = new MapInputPath(onefile, onealias, op, partDesc); if (opCtxMap.containsKey(inp)) { continue; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java (working copy) @@ -64,7 +64,7 @@ private Reporter reporter; - private MapredContext(boolean isMap, JobConf jobConf) { + protected MapredContext(boolean isMap, JobConf jobConf) { this.isMap = isMap; this.jobConf = jobConf; this.udfs = new ArrayList(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java (working copy) @@ -215,7 +215,7 @@ protected void initializeChildren(Configuration hconf) throws HiveException { state = State.INIT; LOG.info("Operator " + id + " " + getName() + " initialized"); - if (childOperators == null) { + if (childOperators == null || childOperators.isEmpty()) { return; } LOG.info("Initializing children of " + id + " " + getName()); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java (working copy) @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +/** + * ObjectCache. Interface for maintaining objects associated with a task. + */ +public interface ObjectCache { + /** + * Add an object to the cache + * @param key + * @param value + */ + public void cache(String key, Object value); + + /** + * Retrieve object from cache. + * @param key + * @return the last cached object with the key, null if none. + */ + public Object retrieve(String key); +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java (working copy) @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; + +/** + * ObjectCacheFactory returns the appropriate cache depending on settings in + * the hive conf. + */ +public class ObjectCacheFactory { + + private ObjectCacheFactory() { + // avoid instantiation + } + + /** + * Returns the appropriate cache + */ + public static ObjectCache getCache(Configuration conf) { + return new org.apache.hadoop.hive.ql.exec.mr.ObjectCache(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -32,6 +32,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -98,6 +99,8 @@ public Operator() { id = String.valueOf(seqId.getAndIncrement()); + childOperators = new ArrayList>(); + parentOperators = new ArrayList>(); initOperatorId(); } @@ -118,6 +121,9 @@ public void setChildOperators( List> childOperators) { + if (childOperators == null) { + childOperators = new ArrayList>(); + } this.childOperators = childOperators; } @@ -151,6 +157,9 @@ public void setParentOperators( List> parentOperators) { + if (parentOperators == null) { + parentOperators = new ArrayList>(); + } this.parentOperators = parentOperators; } @@ -333,7 +342,7 @@ // initialize structure to maintain child op info. operator tree changes // while // initializing so this need to be done here instead of initialize() method - if (childOperators != null) { + if (childOperators != null && !childOperators.isEmpty()) { childOperatorsArray = new Operator[childOperators.size()]; for (int i = 0; i < childOperatorsArray.length; i++) { childOperatorsArray[i] = childOperators.get(i); @@ -364,6 +373,14 @@ passExecContext(this.execContext); initializeOp(hconf); + + // sanity check + if (childOperatorsArray == null + && !(childOperators == null || childOperators.isEmpty())) { + throw new HiveException( + "Internal Hive error during operator initialization."); + } + LOG.info("Initialization Done " + id + " " + getName()); } @@ -390,7 +407,7 @@ protected void initializeChildren(Configuration hconf) throws HiveException { state = State.INIT; LOG.info("Operator " + id + " " + getName() + " initialized"); - if (childOperators == null) { + if (childOperators == null || childOperators.isEmpty()) { return; } LOG.info("Initializing children of " + id + " " + getName()); @@ -466,8 +483,7 @@ */ public abstract void processOp(Object row, int tag) throws HiveException; - // If a operator wants to do some work at the beginning of a group - public void startGroup() throws HiveException { + protected final void defaultStartGroup() throws HiveException { LOG.debug("Starting group"); if (childOperators == null) { @@ -482,8 +498,7 @@ LOG.debug("Start group Done"); } - // If an operator wants to do some work at the end of a group - public void endGroup() throws HiveException { + protected final void defaultEndGroup() throws HiveException { LOG.debug("Ending group"); if (childOperators == null) { @@ -498,6 +513,16 @@ LOG.debug("End group Done"); } + // If a operator wants to do some work at the beginning of a group + public void startGroup() throws HiveException { + defaultStartGroup(); + } + + // If an operator wants to do some work at the end of a group + public void endGroup() throws HiveException { + defaultEndGroup(); + } + // an blocking operator (e.g. GroupByOperator and JoinOperator) can // override this method to forward its outputs public void flush() throws HiveException { @@ -504,7 +529,7 @@ } public void processGroup(int tag) throws HiveException { - if (childOperators == null) { + if (childOperators == null || childOperators.isEmpty()) { return; } for (int i = 0; i < childOperatorsArray.length; i++) { @@ -548,6 +573,8 @@ // call the operator specific close routine closeOp(abort); + reporter = null; + try { logStats(); if (childOperators == null) { @@ -632,7 +659,7 @@ int childIndex = childOperators.indexOf(child); assert childIndex != -1; if (childOperators.size() == 1) { - childOperators = null; + setChildOperators(null); } else { childOperators.remove(childIndex); } @@ -681,7 +708,7 @@ int parentIndex = parentOperators.indexOf(parent); assert parentIndex != -1; if (parentOperators.size() == 1) { - parentOperators = null; + setParentOperators(null); } else { parentOperators.remove(parentIndex); } @@ -701,7 +728,7 @@ Operator currOp = this; for (int i = 0; i < depth; i++) { // If there are more than 1 children at any level, don't do anything - if ((currOp.getChildOperators() == null) || + if ((currOp.getChildOperators() == null) || (currOp.getChildOperators().isEmpty()) || (currOp.getChildOperators().size() > 1)) { return false; } @@ -750,11 +777,6 @@ protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { - if (childOperatorsArray == null && childOperators != null) { - throw new HiveException( - "Internal Hive error during operator initialization."); - } - if ((childOperatorsArray == null) || (getDone())) { return; } @@ -1223,8 +1245,13 @@ } public void setStatistics(Statistics stats) { + if (LOG.isDebugEnabled()) { + LOG.debug("Setting stats ("+stats+") on "+this); + } if (conf != null) { conf.setStatistics(stats); + } else { + LOG.warn("Cannot set stats when there's no descriptor: "+this); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java (working copy) @@ -21,13 +21,18 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.mapred.OutputCollector; public class OperatorUtils { + private static final Log LOG = LogFactory.getLog(OperatorUtils.class); + public static Set findOperators(Operator start, Class clazz) { return findOperators(start, clazz, new HashSet()); } @@ -63,7 +68,7 @@ return; } for (Operator op : childOperators) { - if(op.getName().equals(ReduceSinkOperator.getOperatorName())) { //TODO: + if(op.getName().equals(ReduceSinkOperator.getOperatorName())) { ((ReduceSinkOperator)op).setOutputCollector(out); } else { setChildrenCollector(op.getChildOperators(), out); @@ -70,4 +75,22 @@ } } } + + public static void setChildrenCollector(List> childOperators, Map outMap) { + if (childOperators == null) { + return; + } + for (Operator op : childOperators) { + if(op.getName().equals(ReduceSinkOperator.getOperatorName())) { + ReduceSinkOperator rs = ((ReduceSinkOperator)op); + if (outMap.containsKey(rs.getConf().getOutputName())) { + LOG.info("Setting output collector: " + rs + " --> " + + rs.getConf().getOutputName()); + rs.setOutputCollector(outMap.get(rs.getConf().getOutputName())); + } + } else { + setChildrenCollector(op.getChildOperators(), outMap); + } + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (working copy) @@ -504,10 +504,16 @@ Throwable getException() { return exception; } + void setException(Throwable ex) { exception = ex; } + public void setConsole(LogHelper console) { + this.console = console; + } + + @Override public String toString() { return getId() + ":" + getType(); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (working copy) @@ -262,15 +262,31 @@ return w; } + public static void setMapWork(Configuration conf, MapWork work) { + setBaseWork(conf, MAP_PLAN_NAME, work); + } + public static MapWork getMapWork(Configuration conf) { return (MapWork) getBaseWork(conf, MAP_PLAN_NAME); } + public static void setReduceWork(Configuration conf, ReduceWork work) { + setBaseWork(conf, REDUCE_PLAN_NAME, work); + } + public static ReduceWork getReduceWork(Configuration conf) { return (ReduceWork) getBaseWork(conf, REDUCE_PLAN_NAME); } /** + * Pushes work into the global work map + */ + public static void setBaseWork(Configuration conf, String name, BaseWork work) { + Path path = getPlanPath(conf, name); + gWorkMap.put(path, work); + } + + /** * Returns the Map or Reduce plan * Side effect: the BaseWork returned is also placed in the gWorkMap * @param conf @@ -2724,22 +2740,30 @@ + maxReducers + " totalInputFileSize=" + totalInputFileSize); } + // If this map reduce job writes final data to a table and bucketing is being inferred, + // and the user has configured Hive to do this, make sure the number of reducers is a + // power of two + boolean powersOfTwo = conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) && + finalMapRed && !work.getBucketedColsByDirectory().isEmpty(); + + return estimateReducers(totalInputFileSize, bytesPerReducer, maxReducers, powersOfTwo); + } + + public static int estimateReducers(long totalInputFileSize, long bytesPerReducer, + int maxReducers, boolean powersOfTwo) { + int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer); reducers = Math.max(1, reducers); reducers = Math.min(maxReducers, reducers); - // If this map reduce job writes final data to a table and bucketing is being inferred, - // and the user has configured Hive to do this, make sure the number of reducers is a - // power of two - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) && - finalMapRed && !work.getBucketedColsByDirectory().isEmpty()) { - int reducersLog = (int)(Math.log(reducers) / Math.log(2)) + 1; - int reducersPowerTwo = (int)Math.pow(2, reducersLog); + int reducersLog = (int)(Math.log(reducers) / Math.log(2)) + 1; + int reducersPowerTwo = (int)Math.pow(2, reducersLog); + if (powersOfTwo) { // If the original number of reducers was a power of two, use that if (reducersPowerTwo / 2 == reducers) { - return reducers; + // nothing to do } else if (reducersPowerTwo > maxReducers) { // If the next power of two greater than the original number of reducers is greater // than the max number of reducers, use the preceding power of two, which is strictly @@ -2750,7 +2774,6 @@ reducers = reducersPowerTwo; } } - return reducers; } @@ -3107,6 +3130,10 @@ } } + public static void clearWorkMap() { + gWorkMap.clear(); + } + /** * Create a temp dir in specified baseDir * This can go away once hive moves to support only JDK 7 @@ -3202,4 +3229,3 @@ return footerCount; } } - Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java (working copy) @@ -31,6 +31,8 @@ import org.apache.hadoop.hive.ql.exec.FetchOperator; import org.apache.hadoop.hive.ql.exec.MapOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; +import org.apache.hadoop.hive.ql.exec.ObjectCache; +import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -59,6 +61,7 @@ */ public class ExecMapper extends MapReduceBase implements Mapper { + private static final String PLAN_KEY = "__MAP_PLAN__"; private MapOperator mo; private Map fetchOperators; private OutputCollector oc; @@ -94,11 +97,22 @@ } catch (Exception e) { l4j.info("cannot get classpath: " + e.getMessage()); } + + setDone(false); + + ObjectCache cache = ObjectCacheFactory.getCache(job); + try { jc = job; execContext.setJc(jc); // create map and fetch operators - MapWork mrwork = Utilities.getMapWork(job); + MapWork mrwork = (MapWork) cache.retrieve(PLAN_KEY); + if (mrwork == null) { + mrwork = Utilities.getMapWork(job); + cache.cache(PLAN_KEY, mrwork); + } else { + Utilities.setMapWork(job, mrwork); + } if (mrwork.getVectorMode()) { mo = new VectorMapOperator(); } else { @@ -247,6 +261,7 @@ } } finally { MapredContext.close(); + Utilities.clearWorkMap(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (working copy) @@ -30,6 +30,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.MapredContext; +import org.apache.hadoop.hive.ql.exec.ObjectCache; +import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats; @@ -64,6 +66,8 @@ */ public class ExecReducer extends MapReduceBase implements Reducer { + private static final String PLAN_KEY = "__REDUCE_PLAN__"; + private JobConf jc; private OutputCollector oc; private Operator reducer; @@ -112,7 +116,16 @@ l4j.info("cannot get classpath: " + e.getMessage()); } jc = job; - ReduceWork gWork = Utilities.getReduceWork(job); + + ObjectCache cache = ObjectCacheFactory.getCache(jc); + ReduceWork gWork = (ReduceWork) cache.retrieve(PLAN_KEY); + if (gWork == null) { + gWork = Utilities.getReduceWork(job); + cache.cache(PLAN_KEY, gWork); + } else { + Utilities.setReduceWork(job, gWork); + } + reducer = gWork.getReducer(); reducer.setParentOperators(null); // clear out any parents as reducer is the // root @@ -314,6 +327,7 @@ } } finally { MapredContext.close(); + Utilities.clearWorkMap(); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java (working copy) @@ -128,10 +128,6 @@ this.jobId = jobId; } - - public HadoopJobExecHelper() { - } - public HadoopJobExecHelper(JobConf job, LogHelper console, Task task, HadoopJobExecHook hookCallBack) { this.job = job; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java (working copy) @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.mr; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.ObjectInputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.shims.ShimLoader; + +/** + * HashTableLoader for MR loads the hashtable for MapJoins from local disk (hashtables + * are distributed by using the DistributedCache. + * + */ +public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTableLoader { + + private static final Log LOG = LogFactory.getLog(MapJoinOperator.class.getName()); + + public HashTableLoader() { + } + + @Override + public void load(ExecMapperContext context, + Configuration hconf, + MapJoinDesc desc, + byte posBigTable, + MapJoinTableContainer[] mapJoinTables, + MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException { + + String baseDir = null; + Path currentInputPath = context.getCurrentInputPath(); + LOG.info("******* Load from HashTable File: input : " + currentInputPath); + String fileName = context.getLocalWork().getBucketFileName(currentInputPath.toString()); + try { + if (ShimLoader.getHadoopShims().isLocalMode(hconf)) { + baseDir = context.getLocalWork().getTmpFileURI(); + } else { + Path[] localArchives; + String stageID = context.getLocalWork().getStageID(); + String suffix = Utilities.generateTarFileName(stageID); + FileSystem localFs = FileSystem.getLocal(hconf); + localArchives = DistributedCache.getLocalCacheArchives(hconf); + Path archive; + for (int j = 0; j < localArchives.length; j++) { + archive = localArchives[j]; + if (!archive.getName().endsWith(suffix)) { + continue; + } + Path archiveLocalLink = archive.makeQualified(localFs); + baseDir = archiveLocalLink.toUri().getPath(); + } + } + for (int pos = 0; pos < mapJoinTables.length; pos++) { + if (pos == posBigTable) { + continue; + } + if(baseDir == null) { + throw new IllegalStateException("baseDir cannot be null"); + } + String filePath = Utilities.generatePath(baseDir, desc.getDumpFilePrefix(), (byte)pos, fileName); + Path path = new Path(filePath); + LOG.info("\tLoad back 1 hashtable file from tmp file uri:" + path); + ObjectInputStream in = new ObjectInputStream(new BufferedInputStream( + new FileInputStream(path.toUri().getPath()), 4096)); + try{ + mapJoinTables[pos] = mapJoinTableSerdes[pos].load(in); + } finally { + in.close(); + } + } + } catch (Exception e) { + throw new HiveException(e); + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java (working copy) @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.mr; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + + +/** + * ObjectCache. No-op implementation on MR we don't have a means to reuse + * Objects between runs of the same task. + * + */ +public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache { + + private static final Log LOG = LogFactory.getLog(ObjectCache.class.getName()); + + @Override + public void cache(String key, Object value) { + LOG.info("Ignoring cache key: "+key); + } + + @Override + public Object retrieve(String key) { + LOG.info("Ignoring retrieval request: "+key); + return null; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java (working copy) @@ -79,13 +79,19 @@ return false; return true; } + + public void read(MapJoinObjectSerDeContext context, ObjectInputStream in, Writable container) + throws IOException, SerDeException { + container.readFields(in); + read(context, container); + } + @SuppressWarnings("unchecked") - public void read(MapJoinObjectSerDeContext context, ObjectInputStream in, Writable container) - throws IOException, SerDeException { + public void read(MapJoinObjectSerDeContext context, Writable container) throws SerDeException { SerDe serde = context.getSerDe(); - container.readFields(in); List value = (List)ObjectInspectorUtils.copyToStandardObject(serde.deserialize(container), serde.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE); + if(value == null) { key = EMPTY_OBJECT_ARRAY; } else { @@ -92,8 +98,8 @@ key = value.toArray(); } } - - public void write(MapJoinObjectSerDeContext context, ObjectOutputStream out) + + public void write(MapJoinObjectSerDeContext context, ObjectOutputStream out) throws IOException, SerDeException { SerDe serde = context.getSerDe(); ObjectInspector objectInspector = context.getStandardOI(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinRowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinRowContainer.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinRowContainer.java (working copy) @@ -104,30 +104,34 @@ } return result; } - - @SuppressWarnings({"unchecked"}) - public void read(MapJoinObjectSerDeContext context, ObjectInputStream in, Writable container) + + public void read(MapJoinObjectSerDeContext context, ObjectInputStream in, Writable container) throws IOException, SerDeException { clear(); - SerDe serde = context.getSerDe(); long numRows = in.readLong(); for (long rowIndex = 0L; rowIndex < numRows; rowIndex++) { - container.readFields(in); - List value = (List)ObjectInspectorUtils.copyToStandardObject(serde.deserialize(container), - serde.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE); - if(value == null) { - add(toList(EMPTY_OBJECT_ARRAY)); - } else { - Object[] valuesArray = value.toArray(); - if (context.hasFilterTag()) { - aliasFilter &= ((ShortWritable)valuesArray[valuesArray.length - 1]).get(); - } - add(toList(valuesArray)); + container.readFields(in); + read(context, container); + } + } + + @SuppressWarnings("unchecked") + public void read(MapJoinObjectSerDeContext context, Writable currentValue) throws SerDeException { + SerDe serde = context.getSerDe(); + List value = (List)ObjectInspectorUtils.copyToStandardObject(serde.deserialize(currentValue), + serde.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE); + if(value == null) { + add(toList(EMPTY_OBJECT_ARRAY)); + } else { + Object[] valuesArray = value.toArray(); + if (context.hasFilterTag()) { + aliasFilter &= ((ShortWritable)valuesArray[valuesArray.length - 1]).get(); } + add(toList(valuesArray)); } } - - public void write(MapJoinObjectSerDeContext context, ObjectOutputStream out) + + public void write(MapJoinObjectSerDeContext context, ObjectOutputStream out) throws IOException, SerDeException { SerDe serde = context.getSerDe(); ObjectInspector valueObjectInspector = context.getStandardOI(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java (working copy) @@ -40,6 +40,14 @@ this.keyContext = keyContext; this.valueContext = valueContext; } + + public MapJoinObjectSerDeContext getKeyContext() { + return keyContext; + } + public MapJoinObjectSerDeContext getValueContext() { + return valueContext; + } + @SuppressWarnings({"unchecked"}) public MapJoinTableContainer load(ObjectInputStream in) throws HiveException { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java (working copy) @@ -137,7 +137,7 @@ colNames = String.format("%s %s", colNames, colName); } - LOG.info(String.format("keyObjectInspector [%s]%s => %s", + LOG.debug(String.format("keyObjectInspector [%s]%s => %s", keyObjectInspector.getClass(), keyObjectInspector, colNames)); @@ -169,7 +169,7 @@ colNames = String.format("%s %s", colNames, colName); } - LOG.info(String.format("valueObjectInspector [%s]%s => %s", + LOG.debug(String.format("valueObjectInspector [%s]%s => %s", valueObjectInspector.getClass(), valueObjectInspector, colNames)); @@ -198,7 +198,7 @@ public void processOp(Object row, int tag) throws HiveException { VectorizedRowBatch vrg = (VectorizedRowBatch) row; - LOG.info(String.format("sinking %d rows, %d values, %d keys, %d parts", + LOG.debug(String.format("sinking %d rows, %d values, %d keys, %d parts", vrg.size, valueEval.length, keyEval.length, Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (working copy) @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -42,6 +43,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; @@ -254,6 +256,37 @@ pathToPartitionInfo = mrwork.getPathToPartitionInfo(); } + private void addSplitsForGroup(List dirs, TableScanOperator tableScan, JobConf conf, + InputFormat inputFormat, Class inputFormatClass, int splits, + TableDesc table, List result) throws IOException { + + Utilities.copyTableJobPropertiesToConf(table, conf); + + if (tableScan != null) { + pushFilters(conf, tableScan); + } + + FileInputFormat.setInputPaths(conf, dirs.toArray(new Path[dirs.size()])); + conf.setInputFormat(inputFormat.getClass()); + + int headerCount = 0; + int footerCount = 0; + if (table != null) { + headerCount = Utilities.getHeaderCount(table); + footerCount = Utilities.getFooterCount(table, conf); + if (headerCount != 0 || footerCount != 0) { + + // Input file has header or footer, cannot be splitted. + conf.setLong("mapred.min.split.size", Long.MAX_VALUE); + } + } + + InputSplit[] iss = inputFormat.getSplits(conf, splits); + for (InputSplit is : iss) { + result.add(new HiveInputSplit(is, inputFormatClass.getName())); + } + } + public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); @@ -264,24 +297,28 @@ throw new IOException("No input paths specified in job"); } JobConf newjob = new JobConf(job); - ArrayList result = new ArrayList(); + List result = new ArrayList(); + List currentDirs = new ArrayList(); + Class currentInputFormatClass = null; + TableDesc currentTable = null; + TableScanOperator currentTableScan = null; + // for each dir, get the InputFormat, and do getSplits. for (Path dir : dirs) { PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir); - // create a new InputFormat instance if this is the first time to see this - // class - Class inputFormatClass = part.getInputFileFormatClass(); - InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); - Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob); + Class inputFormatClass = part.getInputFileFormatClass(); + TableDesc table = part.getTableDesc(); + TableScanOperator tableScan = null; + List aliases = + mrwork.getPathToAliases().get(dir.toUri().toString()); + // Make filter pushdown information available to getSplits. - ArrayList aliases = - mrwork.getPathToAliases().get(dir.toUri().toString()); if ((aliases != null) && (aliases.size() == 1)) { Operator op = mrwork.getAliasToWork().get(aliases.get(0)); if ((op != null) && (op instanceof TableScanOperator)) { - TableScanOperator tableScan = (TableScanOperator) op; + tableScan = (TableScanOperator) op; // push down projections. ColumnProjectionUtils.appendReadColumns( newjob, tableScan.getNeededColumnIDs(), tableScan.getNeededColumns()); @@ -290,26 +327,35 @@ } } - FileInputFormat.setInputPaths(newjob, dir); - newjob.setInputFormat(inputFormat.getClass()); - TableDesc tableDesc = part.getTableDesc(); - int headerCount = 0; - int footerCount = 0; - if (tableDesc != null) { - headerCount = Utilities.getHeaderCount(tableDesc); - footerCount = Utilities.getFooterCount(tableDesc, newjob); - if (headerCount != 0 || footerCount != 0) { + if (!currentDirs.isEmpty() && + inputFormatClass.equals(currentInputFormatClass) && + table.equals(currentTable) && + tableScan == currentTableScan) { + currentDirs.add(dir); + continue; + } - // Input file has header or footer, cannot be splitted. - newjob.setLong("mapred.min.split.size", Long.MAX_VALUE); - } + if (!currentDirs.isEmpty()) { + LOG.info("Generating splits"); + addSplitsForGroup(currentDirs, currentTableScan, newjob, + getInputFormatFromCache(currentInputFormatClass, job), + currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), + currentTable, result); } - InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length); - for (InputSplit is : iss) { - result.add(new HiveInputSplit(is, inputFormatClass.getName())); - } + + currentDirs.clear(); + currentDirs.add(dir); + currentTableScan = tableScan; + currentTable = table; + currentInputFormatClass = inputFormatClass; } + LOG.info("Generating splits"); + addSplitsForGroup(currentDirs, currentTableScan, newjob, + getInputFormatFromCache(currentInputFormatClass, job), + currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), + currentTable, result); + LOG.info("number of splits " + result.size()); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS); return result.toArray(new HiveInputSplit[result.size()]); Index: ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java (working copy) @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.io; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.session.SessionState; /** @@ -37,12 +38,19 @@ protected synchronized IOContext initialValue() { return new IOContext(); } }; + private static IOContext ioContext = new IOContext(); + public static IOContext get() { + if (SessionState.get() == null) { + // this happens on the backend. only one io context needed. + return ioContext; + } return IOContext.threadLocal.get(); } public static void clear() { IOContext.threadLocal.remove(); + ioContext = new IOContext(); } long currentBlockStart; Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (working copy) @@ -18,14 +18,15 @@ package org.apache.hadoop.hive.ql.io.orc; +import java.io.IOException; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.orc.Reader.FileMetaInfo; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import java.io.IOException; - /** * Contains factory methods to read or write ORC files. */ @@ -126,6 +127,11 @@ return new ReaderImpl(fs, path); } + public static Reader createReader(FileSystem fs, Path path, FileMetaInfo fileMetaInfo) + throws IOException { + return new ReaderImpl(fs, path, fileMetaInfo); + } + /** * Options for creating ORC file writers. */ @@ -307,4 +313,5 @@ } return memoryManager; } + } Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (working copy) @@ -27,6 +27,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -37,12 +38,18 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.InputFormatChecker; +import org.apache.hadoop.hive.ql.io.orc.Metadata; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FileGenerator; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitGenerator; +import org.apache.hadoop.hive.ql.io.orc.Reader.FileMetaInfo; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde.serdeConstants; @@ -59,6 +66,10 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.StringUtils; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.util.concurrent.ThreadFactoryBuilder; /** * A MapReduce/Hive input format for ORC files. */ @@ -70,9 +81,13 @@ private static final Log LOG = LogFactory.getLog(OrcInputFormat.class); static final String MIN_SPLIT_SIZE = "mapred.min.split.size"; static final String MAX_SPLIT_SIZE = "mapred.max.split.size"; + private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; + private static final PerfLogger perfLogger = PerfLogger.getPerfLogger(); + private static final String CLASS_NAME = ReaderImpl.class.getName(); + /** * When picking the hosts for a split that crosses block boundaries, * any drop any host that has fewer than MIN_INCLUDED_LOCATION of the @@ -169,7 +184,7 @@ String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (serializedPushdown == null || conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) == null) { - LOG.info("No ORC pushdown predicate"); + LOG.debug("No ORC pushdown predicate"); return null; } SearchArgument sarg = SearchArgument.FACTORY.create @@ -181,7 +196,9 @@ public static String[] getIncludedColumnNames( List types, boolean[] includedColumns, Configuration conf) { String columnNamesString = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); - LOG.info("included columns names = " + columnNamesString); + if (LOG.isDebugEnabled()) { + LOG.debug("included columns names = " + columnNamesString); + } if (columnNamesString == null || conf.get(TableScanDesc.FILTER_EXPR_CONF_STR) == null) { return null; } @@ -236,13 +253,27 @@ reporter); return (RecordReader) vorr; } + FileSplit fSplit = (FileSplit)inputSplit; + reporter.setStatus(fSplit.toString()); + Path path = fSplit.getPath(); + FileSystem fs = path.getFileSystem(conf); + Reader reader = null; - FileSplit fileSplit = (FileSplit) inputSplit; - Path path = fileSplit.getPath(); - FileSystem fs = path.getFileSystem(conf); - reporter.setStatus(fileSplit.toString()); - return new OrcRecordReader(OrcFile.createReader(fs, path), conf, - fileSplit.getStart(), fileSplit.getLength()); + if(!(fSplit instanceof OrcSplit)){ + //If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit + reader = OrcFile.createReader(fs, path); + } else { + //We have OrcSplit, which may have footer metadata cached, so use the appropriate reader + //constructor + OrcSplit orcSplit = (OrcSplit) fSplit; + if (orcSplit.hasFooter()) { + FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo(); + reader = OrcFile.createReader(fs, path, fMetaInfo); + } else { + reader = OrcFile.createReader(fs, path); + } + } + return new OrcRecordReader(reader, conf, fSplit.getStart(), fSplit.getLength()); } @Override @@ -299,13 +330,19 @@ * the different worker threads. */ static class Context { - private final ExecutorService threadPool = Executors.newFixedThreadPool(10); - private final List splits = new ArrayList(10000); + private final Configuration conf; + private static Cache footerCache; + private final ExecutorService threadPool; + private final List splits = new ArrayList(10000); private final List errors = new ArrayList(); private final HadoopShims shims = ShimLoader.getHadoopShims(); - private final Configuration conf; private final long maxSize; private final long minSize; + private final boolean footerInSplits; + private final boolean cacheStripeDetails; + private final AtomicInteger cacheHitCounter = new AtomicInteger(0); + private final AtomicInteger numFilesCounter = new AtomicInteger(0); + private Throwable fatalError = null; /** * A count of the number of threads that may create more work for the @@ -317,6 +354,22 @@ this.conf = conf; minSize = conf.getLong(MIN_SPLIT_SIZE, DEFAULT_MIN_SPLIT_SIZE); maxSize = conf.getLong(MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE); + footerInSplits = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS); + int cacheStripeDetailsSize = HiveConf.getIntVar(conf, + ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE); + int numThreads = HiveConf.getIntVar(conf, ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS); + + cacheStripeDetails = (cacheStripeDetailsSize > 0); + + threadPool = Executors.newFixedThreadPool(numThreads, + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("ORC_GET_SPLITS #%d").build()); + + synchronized (Context.class) { + if (footerCache == null && cacheStripeDetails) { + footerCache = CacheBuilder.newBuilder().concurrencyLevel(numThreads) + .initialCapacity(cacheStripeDetailsSize).softValues().build(); + } + } } int getSchedulers() { @@ -329,7 +382,7 @@ * the back. * @result the Nth file split */ - FileSplit getResult(int index) { + OrcSplit getResult(int index) { if (index >= 0) { return splits.get(index); } else { @@ -346,10 +399,14 @@ * @param runnable the object to run */ synchronized void schedule(Runnable runnable) { - if (runnable instanceof FileGenerator) { - schedulers += 1; + if (fatalError == null) { + if (runnable instanceof FileGenerator || runnable instanceof SplitGenerator) { + schedulers += 1; + } + threadPool.execute(runnable); + } else { + throw new RuntimeException("serious problem", fatalError); } - threadPool.execute(runnable); } /** @@ -362,6 +419,11 @@ } } + synchronized void notifyOnNonIOException(Throwable th) { + fatalError = th; + notify(); + } + /** * Wait until all of the tasks are done. It waits until all of the * threads that may create more work are done and then shuts down the @@ -371,6 +433,10 @@ try { while (schedulers != 0) { wait(); + if (fatalError != null) { + threadPool.shutdownNow(); + throw new RuntimeException("serious problem", fatalError); + } } threadPool.shutdown(); threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS); @@ -406,18 +472,57 @@ while (itr.hasNext()) { FileStatus file = itr.next(); if (!file.isDir()) { - context.schedule(new SplitGenerator(context, fs, file)); + FileInfo fileInfo = null; + if (context.cacheStripeDetails) { + fileInfo = verifyCachedFileInfo(file); + } + SplitGenerator spgen = new SplitGenerator(context, fs, file, fileInfo); + spgen.schedule(); } } - // mark the fact that we are done - context.decrementSchedulers(); } catch (Throwable th) { - context.decrementSchedulers(); + if (!(th instanceof IOException)) { + LOG.error("Unexpected Exception", th); + } synchronized (context.errors) { context.errors.add(th); } + if (!(th instanceof IOException)) { + context.notifyOnNonIOException(th); + } + } finally { + context.decrementSchedulers(); } } + + private FileInfo verifyCachedFileInfo(FileStatus file) { + context.numFilesCounter.incrementAndGet(); + FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath()); + if (fileInfo != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Info cached for path: " + file.getPath()); + } + if (fileInfo.modificationTime == file.getModificationTime() && fileInfo.size == file.getLen()) { + // Cached copy is valid + context.cacheHitCounter.incrementAndGet(); + return fileInfo; + } else { + // Invalidate + Context.footerCache.invalidate(file.getPath()); + if (LOG.isDebugEnabled()) { + LOG.debug("Meta-Info for : " + file.getPath() + " changed. CachedModificationTime: " + + fileInfo.modificationTime + ", CurrentModificationTime: " + + file.getModificationTime() + + ", CachedLength: " + fileInfo.size + ", CurrentLength: " + file.getLen()); + } + } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Info not cached for path: " + file.getPath()); + } + } + return null; + } } /** @@ -430,13 +535,20 @@ private final FileStatus file; private final long blockSize; private final BlockLocation[] locations; + private final FileInfo fileInfo; + private Iterable stripes; + private FileMetaInfo fileMetaInfo; + private Metadata metadata; + private List types; + SplitGenerator(Context context, FileSystem fs, - FileStatus file) throws IOException { + FileStatus file, FileInfo fileInfo) throws IOException { this.context = context; this.fs = fs; this.file = file; this.blockSize = file.getBlockSize(); + this.fileInfo = fileInfo; locations = context.shims.getLocations(fs, file); } @@ -444,6 +556,19 @@ return file.getPath(); } + void schedule() throws IOException { + if(locations.length == 1 && file.getLen() < context.maxSize) { + String[] hosts = locations[0].getHosts(); + synchronized (context.splits) { + context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(), + hosts, fileMetaInfo)); + } + } else { + // if it requires a compute task + context.schedule(this); + } + } + @Override public String toString() { return "splitter(" + file.getPath() + ")"; @@ -475,9 +600,10 @@ * are written with large block sizes. * @param offset the start of the split * @param length the length of the split + * @param fileMetaInfo file metadata from footer and postscript * @throws IOException */ - void createSplit(long offset, long length) throws IOException { + void createSplit(long offset, long length, FileMetaInfo fileMetaInfo) throws IOException { String[] hosts; if ((offset % blockSize) + length <= blockSize) { // handle the single block case @@ -521,8 +647,8 @@ hostList.toArray(hosts); } synchronized (context.splits) { - context.splits.add(new FileSplit(file.getPath(), offset, length, - hosts)); + context.splits.add(new OrcSplit(file.getPath(), offset, length, + hosts, fileMetaInfo)); } } @@ -533,9 +659,8 @@ @Override public void run() { try { - Reader orcReader = OrcFile.createReader(fs, file.getPath()); + populateAndCacheStripeDetails(); Configuration conf = context.conf; - List types = orcReader.getTypes(); SearchArgument sarg = createSarg(types, conf); List stripeStats = null; int[] filterColumns = null; @@ -558,7 +683,6 @@ } } - Metadata metadata = orcReader.getMetadata(); stripeStats = metadata.getStripeStatistics(); } @@ -565,7 +689,7 @@ long currentOffset = -1; long currentLength = 0; int idx = -1; - for(StripeInformation stripe: orcReader.getStripes()) { + for(StripeInformation stripe: stripes) { idx++; // eliminate stripes that doesn't satisfy the predicate condition @@ -579,7 +703,7 @@ // create split for the previous unfinished stripe if (currentOffset != -1) { - createSplit(currentOffset, currentLength); + createSplit(currentOffset, currentLength, fileMetaInfo); currentOffset = -1; } continue; @@ -589,7 +713,7 @@ // crossed a block boundary, cut the input split here. if (currentOffset != -1 && currentLength > context.minSize && (currentOffset / blockSize != stripe.getOffset() / blockSize)) { - createSplit(currentOffset, currentLength); + createSplit(currentOffset, currentLength, fileMetaInfo); currentOffset = -1; } // if we aren't building a split, start a new one. @@ -600,20 +724,72 @@ currentLength += stripe.getLength(); } if (currentLength >= context.maxSize) { - createSplit(currentOffset, currentLength); + createSplit(currentOffset, currentLength, fileMetaInfo); currentOffset = -1; } } if (currentOffset != -1) { - createSplit(currentOffset, currentLength); + createSplit(currentOffset, currentLength, fileMetaInfo); } } catch (Throwable th) { + if (!(th instanceof IOException)) { + LOG.error("Unexpected Exception", th); + } synchronized (context.errors) { context.errors.add(th); } + if (!(th instanceof IOException)) { + context.notifyOnNonIOException(th); + } + } finally { + context.decrementSchedulers(); } } + private void populateAndCacheStripeDetails() { + try { + Reader orcReader; + boolean found = false; + if (fileInfo != null) { + found = true; + stripes = fileInfo.stripeInfos; + fileMetaInfo = fileInfo.fileMetaInfo; + metadata = fileInfo.metadata; + types = fileInfo.types; + // For multiple runs, in case sendSplitsInFooter changes + if (fileMetaInfo == null && context.footerInSplits) { + orcReader = OrcFile.createReader(fs, file.getPath()); + fileInfo.fileMetaInfo = orcReader.getFileMetaInfo(); + fileInfo.metadata = orcReader.getMetadata(); + fileInfo.types = orcReader.getTypes(); + } + } + if (!found) { + orcReader = OrcFile.createReader(fs, file.getPath()); + stripes = orcReader.getStripes(); + metadata = orcReader.getMetadata(); + types = orcReader.getTypes(); + fileMetaInfo = context.footerInSplits ? orcReader.getFileMetaInfo() : null; + if (context.cacheStripeDetails) { + // Populate into cache. + Context.footerCache.put(file.getPath(), + new FileInfo(file.getModificationTime(), file.getLen(), stripes, metadata, + types, fileMetaInfo)); + } + } + } catch (Throwable th) { + if (!(th instanceof IOException)) { + LOG.error("Unexpected Exception", th); + } + synchronized (context.errors) { + context.errors.add(th); + } + if (!(th instanceof IOException)) { + context.notifyOnNonIOException(th); + } + } + } + private boolean containsColumn(String[] neededColumns, String colName) { for (String col : neededColumns) { if (colName.equalsIgnoreCase(col)) { @@ -677,7 +853,6 @@ return null; } } - } @Override @@ -684,6 +859,7 @@ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { // use threads to resolve directories into splits + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); Context context = new Context(job); for(Path dir: getInputPaths(job)) { FileSystem fs = dir.getFileSystem(job); @@ -698,7 +874,7 @@ if (th instanceof IOException) { errors.add((IOException) th); } else { - throw new IOException("serious problem", th); + throw new RuntimeException("serious problem", th); } } throw new InvalidInputException(errors); @@ -705,6 +881,37 @@ } InputSplit[] result = new InputSplit[context.splits.size()]; context.splits.toArray(result); + if (context.cacheStripeDetails) { + LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + + context.numFilesCounter.get()); + } + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); return result; } + + /** + * FileInfo. + * + * Stores information relevant to split generation for an ORC File. + * + */ + private static class FileInfo { + long modificationTime; + long size; + Iterable stripeInfos; + FileMetaInfo fileMetaInfo; + Metadata metadata; + List types; + + + FileInfo(long modificationTime, long size, Iterable stripeInfos, + Metadata metadata, List types, FileMetaInfo fileMetaInfo) { + this.modificationTime = modificationTime; + this.size = size; + this.stripeInfos = stripeInfos; + this.fileMetaInfo = fileMetaInfo; + this.metadata = metadata; + this.types = types; + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java (working copy) @@ -0,0 +1,92 @@ +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.orc.Reader.FileMetaInfo; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.mapred.FileSplit; + + + +/** + * OrcFileSplit. Holds file meta info + * + */ +public class OrcSplit extends FileSplit { + private Reader.FileMetaInfo fileMetaInfo; + private boolean hasFooter; + + protected OrcSplit(){ + //The FileSplit() constructor in hadoop 0.20 and 1.x is package private so can't use it. + //This constructor is used to create the object and then call readFields() + // so just pass nulls to this super constructor. + super(null, 0, 0, (String[])null); + } + + public OrcSplit(Path path, long offset, long length, String[] hosts, + FileMetaInfo fileMetaInfo) { + super(path, offset, length, hosts); + this.fileMetaInfo = fileMetaInfo; + hasFooter = this.fileMetaInfo != null; + } + + @Override + public void write(DataOutput out) throws IOException { + //serialize path, offset, length using FileSplit + super.write(out); + + // Whether footer information follows. + out.writeBoolean(hasFooter); + + if (hasFooter) { + // serialize FileMetaInfo fields + Text.writeString(out, fileMetaInfo.compressionType); + WritableUtils.writeVInt(out, fileMetaInfo.bufferSize); + WritableUtils.writeVInt(out, fileMetaInfo.metadataSize); + + // serialize FileMetaInfo field footer + ByteBuffer footerBuff = fileMetaInfo.footerBuffer; + footerBuff.reset(); + + // write length of buffer + WritableUtils.writeVInt(out, footerBuff.limit() - footerBuff.position()); + out.write(footerBuff.array(), footerBuff.position(), + footerBuff.limit() - footerBuff.position()); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + //deserialize path, offset, length using FileSplit + super.readFields(in); + + hasFooter = in.readBoolean(); + + if (hasFooter) { + // deserialize FileMetaInfo fields + String compressionType = Text.readString(in); + int bufferSize = WritableUtils.readVInt(in); + int metadataSize = WritableUtils.readVInt(in); + + // deserialize FileMetaInfo field footer + int footerBuffSize = WritableUtils.readVInt(in); + ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize); + in.readFully(footerBuff.array(), 0, footerBuffSize); + + fileMetaInfo = new FileMetaInfo(compressionType, bufferSize, metadataSize, footerBuff); + } + } + + public FileMetaInfo getFileMetaInfo(){ + return fileMetaInfo; + } + + public boolean hasFooter() { + return hasFooter; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java (working copy) @@ -122,6 +122,30 @@ List getTypes(); /** + * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file + * that is useful for Reader implementation + * + */ + class FileMetaInfo{ + final String compressionType; + final int bufferSize; + final int metadataSize; + final ByteBuffer footerBuffer; + FileMetaInfo(String compressionType, int bufferSize, int metadataSize, ByteBuffer footerBuffer){ + this.compressionType = compressionType; + this.bufferSize = bufferSize; + this.metadataSize = metadataSize; + this.footerBuffer = footerBuffer; + } + } + + /** + * Get the metadata stored in footer and postscript sections of the file + * @return MetaInfo object with file metadata + */ + FileMetaInfo getFileMetaInfo(); + + /** * Create a RecordReader that will scan the entire file. * @param include true for each column that should be included * @return A new RecordReader Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (working copy) @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Text; @@ -56,11 +57,18 @@ private final int bufferSize; private OrcProto.Metadata metadata = null; private final int metadataSize; - private final int footerOffset; private final OrcProto.Footer footer; private final ObjectInspector inspector; private long deserializedSize = -1; + //serialized footer - Keeping this around for use by getFileMetaInfo() + // will help avoid cpu cycles spend in deserializing at cost of increased + // memory footprint. + private final ByteBuffer footerByteBuffer; + + private static final PerfLogger perfLogger = PerfLogger.getPerfLogger(); + private static final String CLASS_NAME = ReaderImpl.class.getName(); + private static class StripeInformationImpl implements StripeInformation { private final OrcProto.StripeInformation stripe; @@ -276,10 +284,66 @@ } } + /** + * Constructor that extracts metadata information from file footer + * @param fs + * @param path + * @throws IOException + */ ReaderImpl(FileSystem fs, Path path) throws IOException { this.fileSystem = fs; this.path = path; + + FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fs, path); + + MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionType, + footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer); + + this.footerByteBuffer = footerMetaData.footerBuffer; + this.compressionKind = rInfo.compressionKind; + this.codec = rInfo.codec; + this.bufferSize = rInfo.bufferSize; + this.metadataSize = rInfo.metadataSize; + this.metadata = rInfo.metadata; + this.footer = rInfo.footer; + this.inspector = rInfo.inspector; + } + + + /** + * Constructor that takes already saved footer meta information. Used for creating RecordReader + * from saved information in InputSplit + * @param fs + * @param path + * @param fMetaInfo + * @throws IOException + */ + ReaderImpl(FileSystem fs, Path path, FileMetaInfo fMetaInfo) + throws IOException { + this.fileSystem = fs; + this.path = path; + + MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor( + fMetaInfo.compressionType, + fMetaInfo.bufferSize, + fMetaInfo.metadataSize, + fMetaInfo.footerBuffer + ); + this.footerByteBuffer = fMetaInfo.footerBuffer; + this.compressionKind = rInfo.compressionKind; + this.codec = rInfo.codec; + this.bufferSize = rInfo.bufferSize; + this.metadataSize = rInfo.metadataSize; + this.metadata = rInfo.metadata; + this.footer = rInfo.footer; + this.inspector = rInfo.inspector; + } + + + private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, Path path) throws IOException { FSDataInputStream file = fs.open(path); + + //read last bytes into buffer to get PostScript long size = fs.getFileStatus(path).getLen(); int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); file.seek(size - readSize); @@ -286,6 +350,9 @@ ByteBuffer buffer = ByteBuffer.allocate(readSize); file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); + + //read the PostScript + //get length of PostScript int psLen = buffer.get(readSize - 1) & 0xff; ensureOrcFooter(file, path, psLen, buffer); int psOffset = readSize - 1 - psLen; @@ -292,62 +359,110 @@ CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset, psLen); OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in); + checkOrcVersion(LOG, path, ps.getVersionList()); + int footerSize = (int) ps.getFooterLength(); - metadataSize = (int) ps.getMetadataLength(); - footerOffset = (int) (size - ( psLen + 1 + footerSize)); - bufferSize = (int) ps.getCompressionBlockSize(); + int metadataSize = (int) ps.getMetadataLength(); + + //check compression codec switch (ps.getCompression()) { case NONE: - compressionKind = CompressionKind.NONE; break; case ZLIB: - compressionKind = CompressionKind.ZLIB; break; case SNAPPY: - compressionKind = CompressionKind.SNAPPY; break; case LZO: - compressionKind = CompressionKind.LZO; break; default: throw new IllegalArgumentException("Unknown compression"); } - codec = WriterImpl.createCodec(compressionKind); - int extra = Math.max(0, psLen + 1 + footerSize - readSize); + + //check if extra bytes need to be read + int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize); if (extra > 0) { + //more bytes need to be read, seek back to the right place and read extra bytes file.seek(size - readSize - extra); ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); file.readFully(extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra); extraBuf.position(extra); + //append with already read bytes extraBuf.put(buffer); buffer = extraBuf; buffer.position(0); - buffer.limit(footerSize); + buffer.limit(footerSize + metadataSize); } else { - buffer.position(psOffset - footerSize); + //footer is already in the bytes in buffer, just adjust position, length + buffer.position(psOffset - footerSize - metadataSize); buffer.limit(psOffset); } - // read footer - InputStream instream = InStream.create("footer", new ByteBuffer[]{buffer}, - new long[]{0L}, footerSize, codec, bufferSize); - footer = OrcProto.Footer.parseFrom(instream); - inspector = OrcStruct.createObjectInspector(0, footer.getTypesList()); - // if metadata is already contained in first 16K file read then parse it - // else do it lazily - if(extra == 0) { - buffer.position(psOffset - (footerSize + metadataSize)); - buffer.limit(psOffset - footerSize); - instream = InStream.create("metadata", new ByteBuffer[]{buffer}, + // remember position for later + buffer.mark(); + + file.close(); + + return new FileMetaInfo( + ps.getCompression().toString(), + (int) ps.getCompressionBlockSize(), + (int) ps.getMetadataLength(), + buffer + ); + } + + + + /** + * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl + * from serialized fields. + * As the fields are final, the fields need to be initialized in the constructor and + * can't be done in some helper function. So this helper class is used instead. + * + */ + private static class MetaInfoObjExtractor{ + final CompressionKind compressionKind; + final CompressionCodec codec; + final int bufferSize; + final int metadataSize; + final OrcProto.Metadata metadata; + final OrcProto.Footer footer; + final ObjectInspector inspector; + + MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, + ByteBuffer footerBuffer) throws IOException { + + this.compressionKind = CompressionKind.valueOf(codecStr); + this.bufferSize = bufferSize; + this.codec = WriterImpl.createCodec(compressionKind); + this.metadataSize = metadataSize; + + int position = footerBuffer.position(); + int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize; + footerBuffer.limit(position + metadataSize); + + InputStream instream = InStream.create("metadata", new ByteBuffer[]{footerBuffer}, new long[]{0L}, metadataSize, codec, bufferSize); - metadata = OrcProto.Metadata.parseFrom(instream); + this.metadata = OrcProto.Metadata.parseFrom(instream); + + footerBuffer.position(position + metadataSize); + footerBuffer.limit(position + metadataSize + footerBufferSize); + instream = InStream.create("footer", new ByteBuffer[]{footerBuffer}, + new long[]{0L}, footerBufferSize, codec, bufferSize); + this.footer = OrcProto.Footer.parseFrom(instream); + + footerBuffer.position(position); + this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList()); } + } - file.close(); + public FileMetaInfo getFileMetaInfo(){ + return new FileMetaInfo(compressionKind.toString(), bufferSize, metadataSize, footerByteBuffer); } + + @Override public RecordReader rows(boolean[] include) throws IOException { return rows(0, Long.MAX_VALUE, include, null, null); @@ -497,20 +612,6 @@ @Override public Metadata getMetadata() throws IOException { - // if metadata is not parsed already then read and parse it - if (metadata == null && metadataSize > 0) { - FSDataInputStream file = this.fileSystem.open(path); - file.seek(footerOffset - metadataSize); - ByteBuffer buffer = ByteBuffer.allocate(metadataSize); - file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(), - buffer.remaining()); - buffer.position(0); - buffer.limit(metadataSize); - InputStream instream = InStream.create("metadata", new ByteBuffer[] {buffer}, - new long[] {0L}, metadataSize, codec, bufferSize); - metadata = OrcProto.Metadata.parseFrom(instream); - file.close(); - } return new Metadata(metadata); } Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java (working copy) @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.InputFormatChecker; +import org.apache.hadoop.hive.ql.io.orc.Reader.FileMetaInfo; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.NullWritable; @@ -140,11 +141,30 @@ public RecordReader getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { - FileSplit fileSplit = (FileSplit) inputSplit; - Path path = fileSplit.getPath(); + FileSplit fSplit = (FileSplit)inputSplit; + reporter.setStatus(fSplit.toString()); + + Path path = fSplit.getPath(); FileSystem fs = path.getFileSystem(conf); - reporter.setStatus(fileSplit.toString()); - return new VectorizedOrcRecordReader(OrcFile.createReader(fs, path), conf, fileSplit); + + Reader reader = null; + + if(!(fSplit instanceof OrcSplit)){ + //If CombineHiveInputFormat is used, it works with FileSplit and not OrcSplit + reader = OrcFile.createReader(fs, path); + } else { + //We have OrcSplit, which may have footer metadata cached, so use the appropriate reader + //constructor + OrcSplit orcSplit = (OrcSplit) fSplit; + if (orcSplit.hasFooter()) { + FileMetaInfo fMetaInfo = orcSplit.getFileMetaInfo(); + reader = OrcFile.createReader(fs, path, fMetaInfo); + } else { + reader = OrcFile.createReader(fs, path); + } + } + + return new VectorizedOrcRecordReader(reader, conf, fSplit); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (working copy) @@ -36,9 +36,9 @@ public class DefaultGraphWalker implements GraphWalker { protected Stack opStack; - private final List toWalk = new ArrayList(); - private final HashMap retMap = new HashMap(); - private final Dispatcher dispatcher; + protected final List toWalk = new ArrayList(); + protected final HashMap retMap = new HashMap(); + protected final Dispatcher dispatcher; /** * Constructor. Index: ql/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java (working copy) @@ -53,6 +53,8 @@ public static final String FAILURE_HOOK = "FailureHook."; public static final String DRIVER_RUN = "Driver.run"; public static final String TIME_TO_SUBMIT = "TimeToSubmit"; + public static final String LOAD_HASHTABLE = "LoadHashtable"; + public static final String ORC_GET_SPLITS = "OrcGetSplits"; protected static final ThreadLocal perfLogger = new ThreadLocal(); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -2478,7 +2478,7 @@ try { return getMSC().updateTableColumnStatistics(statsObj); } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); + LOG.debug(StringUtils.stringifyException(e)); throw new HiveException(e); } } @@ -2487,7 +2487,7 @@ try { return getMSC().updatePartitionColumnStatistics(statsObj); } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); + LOG.debug(StringUtils.stringifyException(e)); throw new HiveException(e); } } @@ -2497,7 +2497,7 @@ try { return getMSC().getTableColumnStatistics(dbName, tableName, colName); } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); + LOG.debug(StringUtils.stringifyException(e)); throw new HiveException(e); } @@ -2508,7 +2508,7 @@ try { return getMSC().getPartitionColumnStatistics(dbName, tableName, partName, colName); } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); + LOG.debug(StringUtils.stringifyException(e)); throw new HiveException(e); } } @@ -2518,7 +2518,7 @@ try { return getMSC().deleteTableColumnStatistics(dbName, tableName, colName); } catch(Exception e) { - LOG.error(StringUtils.stringifyException(e)); + LOG.debug(StringUtils.stringifyException(e)); throw new HiveException(e); } } @@ -2528,7 +2528,7 @@ try { return getMSC().deletePartitionColumnStatistics(dbName, tableName, partName, colName); } catch(Exception e) { - LOG.error(StringUtils.stringifyException(e)); + LOG.debug(StringUtils.stringifyException(e)); throw new HiveException(e); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (working copy) @@ -473,7 +473,7 @@ JoinDesc joinDesc = joinOp.getConf(); JoinCondDesc[] joinCondns = joinDesc.getConds(); Set joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns); - if (joinCandidates == null) { + if (joinCandidates.isEmpty()) { // This is a full outer join. This can never be a map-join // of any type. So return false. return false; @@ -527,6 +527,7 @@ SortBucketJoinProcCtx joinContext, ParseContext parseContext) throws SemanticException { MapJoinOperator mapJoinOp = MapJoinProcessor.convertMapJoin( + parseContext.getConf(), parseContext.getOpParseCtx(), joinOp, pGraphContext.getJoinContext().get(joinOp), Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -19,10 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; -import java.net.URI; import java.util.ArrayList; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Stack; @@ -31,43 +29,18 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MoveTask; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles; -import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx; -import org.apache.hadoop.hive.ql.plan.ConditionalWork; -import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.LoadFileDesc; -import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.ql.plan.StatsWork; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.stats.StatsFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.mapred.InputFormat; /** * Processor for the rule - table scan followed by reduce sink. @@ -97,8 +70,7 @@ FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE - fsOp.getConf().getTableInfo().getTableName() != null && - parseCtx.getQB().getParseInfo().isInsertToTable(); + GenMapRedUtils.isInsertInto(parseCtx, fsOp); HiveConf hconf = parseCtx.getConf(); // Mark this task as a final map reduce task (ignoring the optional merge task) @@ -113,49 +85,12 @@ return true; } - // Has the user enabled merging of files for map-only jobs or for all jobs - if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { - List> mvTasks = ctx.getMvTask(); - - // In case of unions or map-joins, it is possible that the file has - // already been seen. - // So, no need to attempt to merge the files again. - if ((ctx.getSeenFileSinkOps() == null) - || (!ctx.getSeenFileSinkOps().contains(nd))) { - - // no need of merging if the move is to a local file system - MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); - - if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) { - addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); - } - - if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { - if (fsOp.getConf().isLinkedFileSink()) { - // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the - // number of reducers are few, so the number of files anyway are small. - // However, with this optimization, we are increasing the number of files - // possibly by a big margin. So, merge aggresively. - if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || - hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) { - chDir = true; - } - } else { - // There are separate configuration parameters to control whether to - // merge for a map-only job - // or for a map-reduce job - MapredWork currWork = (MapredWork) currTask.getWork(); - boolean mergeMapOnly = - hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReduceWork() == null; - boolean mergeMapRed = - hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && - currWork.getReduceWork() != null; - if (mergeMapOnly || mergeMapRed) { - chDir = true; - } - } - } - } + // In case of unions or map-joins, it is possible that the file has + // already been seen. + // So, no need to attempt to merge the files again. + if ((ctx.getSeenFileSinkOps() == null) + || (!ctx.getSeenFileSinkOps().contains(nd))) { + chDir = GenMapRedUtils.isMergeRequired(ctx.getMvTask(), hconf, fsOp, currTask, isInsertTable); } Path finalName = processFS(fsOp, stack, opProcCtx, chDir); @@ -164,7 +99,9 @@ // Merge the files in the destination table/partitions by creating Map-only merge job // If underlying data is RCFile a RCFileBlockMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); - createMRWorkForMergingFiles(fsOp, ctx, finalName); + GenMapRedUtils.createMRWorkForMergingFiles(fsOp, finalName, + ctx.getDependencyTaskForMultiInsert(), ctx.getMvTask(), + hconf, currTask); } FileSinkDesc fileSinkDesc = fsOp.getConf(); @@ -207,437 +144,6 @@ } /** - * Add the StatsTask as a dependent task of the MoveTask - * because StatsTask will change the Table/Partition metadata. For atomicity, we - * should not change it before the data is actually there done by MoveTask. - * - * @param nd - * the FileSinkOperator whose results are taken care of by the MoveTask. - * @param mvTask - * The MoveTask that moves the FileSinkOperator's results. - * @param currTask - * The MapRedTask that the FileSinkOperator belongs to. - * @param hconf - * HiveConf - */ - private void addStatsTask(FileSinkOperator nd, MoveTask mvTask, - Task currTask, HiveConf hconf) { - - MoveWork mvWork = mvTask.getWork(); - StatsWork statsWork = null; - if (mvWork.getLoadTableWork() != null) { - statsWork = new StatsWork(mvWork.getLoadTableWork()); - } else if (mvWork.getLoadFileWork() != null) { - statsWork = new StatsWork(mvWork.getLoadFileWork()); - } - assert statsWork != null : "Error when genereting StatsTask"; - - statsWork.setSourceTask(currTask); - statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); - MapredWork mrWork = (MapredWork) currTask.getWork(); - - // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix - // in FileSinkDesc is used for stats publishing. They should be consistent. - statsWork.setAggKey(nd.getConf().getStatsAggPrefix()); - Task statsTask = TaskFactory.get(statsWork, hconf); - - // mark the MapredWork and FileSinkOperator for gathering stats - nd.getConf().setGatherStats(true); - mrWork.getMapWork().setGatheringStats(true); - if (mrWork.getReduceWork() != null) { - mrWork.getReduceWork().setGatheringStats(true); - } - nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); - nd.getConf().setMaxStatsKeyPrefixLength(StatsFactory.getMaxPrefixLength(hconf)); - // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); - - // subscribe feeds from the MoveTask so that MoveTask can forward the list - // of dynamic partition list to the StatsTask - mvTask.addDependentTask(statsTask); - statsTask.subscribeFeed(mvTask); - } - - /** - * @param fsInput The FileSink operator. - * @param ctx The MR processing context. - * @param finalName the final destination path the merge job should output. - * @throws SemanticException - - * create a Map-only merge job using CombineHiveInputFormat for all partitions with - * following operators: - * MR job J0: - * ... - * | - * v - * FileSinkOperator_1 (fsInput) - * | - * v - * Merge job J1: - * | - * v - * TableScan (using CombineHiveInputFormat) (tsMerge) - * | - * v - * FileSinkOperator (fsMerge) - * - * Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths - * do - * not contain the dynamic partitions (their parent). So after the dynamic partitions are - * created (after the first job finished before the moveTask or ConditionalTask start), - * we need to change the pathToPartitionInfo & pathToAlias to include the dynamic - * partition - * directories. - * - */ - private void createMRWorkForMergingFiles (FileSinkOperator fsInput, GenMRProcContext ctx, - Path finalName) throws SemanticException { - - // - // 1. create the operator tree - // - HiveConf conf = ctx.getParseCtx().getConf(); - FileSinkDesc fsInputDesc = fsInput.getConf(); - - // Create a TableScan operator - RowSchema inputRS = fsInput.getSchema(); - Operator tsMerge = - GenMapRedUtils.createTemporaryTableScanOperator(inputRS); - - // Create a FileSink operator - TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone(); - FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName.toUri().toString(), ts, - conf.getBoolVar(ConfVars.COMPRESSRESULT)); - FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild( - fsOutputDesc, inputRS, tsMerge); - - // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema - // needs to include the partition column, and the fsOutput should have - // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned. - DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx(); - if (dpCtx != null && dpCtx.getNumDPCols() > 0) { - // adding DP ColumnInfo to the RowSchema signature - ArrayList signature = inputRS.getSignature(); - String tblAlias = fsInputDesc.getTableInfo().getTableName(); - LinkedHashMap colMap = new LinkedHashMap(); - StringBuilder partCols = new StringBuilder(); - for (String dpCol : dpCtx.getDPColNames()) { - ColumnInfo colInfo = new ColumnInfo(dpCol, - TypeInfoFactory.stringTypeInfo, // all partition column type should be string - tblAlias, true); // partition column is virtual column - signature.add(colInfo); - colMap.put(dpCol, dpCol); // input and output have the same column name - partCols.append(dpCol).append('/'); - } - partCols.setLength(partCols.length() - 1); // remove the last '/' - inputRS.setSignature(signature); - - // create another DynamicPartitionCtx, which has a different input-to-DP column mapping - DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx); - dpCtx2.setInputToDPCols(colMap); - fsOutputDesc.setDynPartCtx(dpCtx2); - - // update the FileSinkOperator to include partition columns - fsInputDesc.getTableInfo().getProperties().setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, - partCols.toString()); // list of dynamic partition column names - } else { - // non-partitioned table - fsInputDesc.getTableInfo().getProperties().remove( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - } - - // - // 2. Constructing a conditional task consisting of a move task and a map reduce task - // - MoveWork dummyMv = new MoveWork(null, null, null, - new LoadFileDesc(new Path(fsInputDesc.getFinalDirName()), finalName, true, null, null), false); - MapWork cplan; - Serializable work; - - if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && - fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) { - - // Check if InputFormatClass is valid - String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL); - try { - Class c = (Class) Class.forName(inputFormatClass); - - LOG.info("RCFile format- Using block level merge"); - cplan = createRCFileMergeTask(fsInputDesc, finalName, - dpCtx != null && dpCtx.getNumDPCols() > 0); - work = cplan; - } catch (ClassNotFoundException e) { - String msg = "Illegal input format class: " + inputFormatClass; - throw new SemanticException(msg); - } - - } else { - cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); - work = new MapredWork(); - ((MapredWork)work).setMapWork(cplan); - // use CombineHiveInputFormat for map-only merging - } - cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); - // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't - // know if merge MR2 will be triggered at execution time - ConditionalTask cndTsk = createCondTask(conf, ctx.getCurrTask(), dummyMv, work, - fsInputDesc.getFinalDirName()); - - // keep the dynamic partition context in conditional task resolver context - ConditionalResolverMergeFilesCtx mrCtx = - (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx(); - mrCtx.setDPCtx(fsInputDesc.getDynPartCtx()); - mrCtx.setLbCtx(fsInputDesc.getLbCtx()); - - // - // 3. add the moveTask as the children of the conditional task - // - linkMoveTask(ctx, fsOutput, cndTsk); - } - - /** - * Make the move task in the GenMRProcContext following the FileSinkOperator a dependent of all - * possible subtrees branching from the ConditionalTask. - * - * @param ctx - * @param newOutput - * @param cndTsk - */ - private void linkMoveTask(GenMRProcContext ctx, FileSinkOperator newOutput, - ConditionalTask cndTsk) { - - List> mvTasks = ctx.getMvTask(); - Task mvTask = findMoveTask(mvTasks, newOutput); - - for (Task tsk : cndTsk.getListTasks()) { - linkMoveTask(ctx, mvTask, tsk); - } - } - - /** - * Follows the task tree down from task and makes all leaves parents of mvTask - * - * @param ctx - * @param mvTask - * @param task - */ - private void linkMoveTask(GenMRProcContext ctx, Task mvTask, - Task task) { - - if (task.getDependentTasks() == null || task.getDependentTasks().isEmpty()) { - // If it's a leaf, add the move task as a child - addDependentMoveTasks(ctx, mvTask, task); - } else { - // Otherwise, for each child run this method recursively - for (Task childTask : task.getDependentTasks()) { - linkMoveTask(ctx, mvTask, childTask); - } - } - } - - /** - * Adds the dependencyTaskForMultiInsert in ctx as a dependent of parentTask. If mvTask is a - * load table, and HIVE_MULTI_INSERT_ATOMIC_OUTPUTS is set, adds mvTask as a dependent of - * dependencyTaskForMultiInsert in ctx, otherwise adds mvTask as a dependent of parentTask as - * well. - * - * @param ctx - * @param mvTask - * @param parentTask - */ - private void addDependentMoveTasks(GenMRProcContext ctx, Task mvTask, - Task parentTask) { - - if (mvTask != null) { - if (ctx.getConf().getBoolVar(ConfVars.HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES)) { - DependencyCollectionTask dependencyTask = ctx.getDependencyTaskForMultiInsert(); - parentTask.addDependentTask(dependencyTask); - if (mvTask.getWork().getLoadTableWork() != null) { - // Moving tables/partitions depend on the dependencyTask - dependencyTask.addDependentTask(mvTask); - } else { - // Moving files depends on the parentTask (we still want the dependencyTask to depend - // on the parentTask) - parentTask.addDependentTask(mvTask); - } - } else { - parentTask.addDependentTask(mvTask); - } - } - } - - /** - * Create a MapredWork based on input path, the top operator and the input - * table descriptor. - * - * @param conf - * @param topOp - * the table scan operator that is the root of the MapReduce task. - * @param fsDesc - * the file sink descriptor that serves as the input to this merge task. - * @param parentMR - * the parent MapReduce work - * @param parentFS - * the last FileSinkOperator in the parent MapReduce work - * @return the MapredWork - */ - private MapWork createMRWorkForMergingFiles (HiveConf conf, - Operator topOp, FileSinkDesc fsDesc) { - - ArrayList aliases = new ArrayList(); - String inputDir = fsDesc.getFinalDirName(); - TableDesc tblDesc = fsDesc.getTableInfo(); - aliases.add(inputDir); // dummy alias: just use the input path - - // constructing the default MapredWork - MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf); - MapWork cplan = cMrPlan.getMapWork(); - cplan.getPathToAliases().put(inputDir, aliases); - cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null)); - cplan.getAliasToWork().put(inputDir, topOp); - cplan.setMapperCannotSpanPartns(true); - - return cplan; - } - - /** - * Create a block level merge task for RCFiles. - * - * @param fsInputDesc - * @param finalName - * @return MergeWork if table is stored as RCFile, - * null otherwise - */ - private MapWork createRCFileMergeTask(FileSinkDesc fsInputDesc, - Path finalName, boolean hasDynamicPartitions) throws SemanticException { - - String inputDir = fsInputDesc.getFinalDirName(); - TableDesc tblDesc = fsInputDesc.getTableInfo(); - - if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) { - ArrayList inputDirs = new ArrayList(1); - ArrayList inputDirstr = new ArrayList(1); - if (!hasDynamicPartitions - && !isSkewedStoredAsDirs(fsInputDesc)) { - inputDirs.add(new Path(inputDir)); - inputDirstr.add(inputDir); - } - - MergeWork work = new MergeWork(inputDirs, finalName, - hasDynamicPartitions, fsInputDesc.getDynPartCtx()); - LinkedHashMap> pathToAliases = - new LinkedHashMap>(); - pathToAliases.put(inputDir, (ArrayList) inputDirstr.clone()); - work.setMapperCannotSpanPartns(true); - work.setPathToAliases(pathToAliases); - work.setAliasToWork( - new LinkedHashMap>()); - if (hasDynamicPartitions - || isSkewedStoredAsDirs(fsInputDesc)) { - work.getPathToPartitionInfo().put(inputDir, - new PartitionDesc(tblDesc, null)); - } - work.setListBucketingCtx(fsInputDesc.getLbCtx()); - - return work; - } - - throw new SemanticException("createRCFileMergeTask called on non-RCFile table"); - } - - /** - * check if it is skewed table and stored as dirs. - * - * @param fsInputDesc - * @return - */ - private boolean isSkewedStoredAsDirs(FileSinkDesc fsInputDesc) { - return (fsInputDesc.getLbCtx() == null) ? false : fsInputDesc.getLbCtx() - .isSkewedStoredAsDir(); - } - - /** - * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork. - * - * @param conf - * HiveConf - * @param currTask - * current leaf task - * @param mvWork - * MoveWork for the move task - * @param mergeWork - * MapredWork for the merge task. - * @param inputPath - * the input directory of the merge/move task - * @return The conditional task - */ - private ConditionalTask createCondTask(HiveConf conf, - Task currTask, MoveWork mvWork, - Serializable mergeWork, String inputPath) { - - // There are 3 options for this ConditionalTask: - // 1) Merge the partitions - // 2) Move the partitions (i.e. don't merge the partitions) - // 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't - // merge others) in this case the merge is done first followed by the move to prevent - // conflicts. - Task mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf); - Task moveOnlyMoveTask = TaskFactory.get(mvWork, conf); - Task mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf); - Task mergeAndMoveMoveTask = TaskFactory.get(mvWork, conf); - - // NOTE! It is necessary merge task is the parent of the move task, and not - // the other way around, for the proper execution of the execute method of - // ConditionalTask - mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask); - - List listWorks = new ArrayList(); - listWorks.add(mvWork); - listWorks.add(mergeWork); - - ConditionalWork cndWork = new ConditionalWork(listWorks); - - List> listTasks = new ArrayList>(); - listTasks.add(moveOnlyMoveTask); - listTasks.add(mergeOnlyMergeTask); - listTasks.add(mergeAndMoveMergeTask); - - ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf); - cndTsk.setListTasks(listTasks); - - // create resolver - cndTsk.setResolver(new ConditionalResolverMergeFiles()); - ConditionalResolverMergeFilesCtx mrCtx = - new ConditionalResolverMergeFilesCtx(listTasks, inputPath); - cndTsk.setResolverCtx(mrCtx); - - // make the conditional task as the child of the current leaf task - currTask.addDependentTask(cndTsk); - - return cndTsk; - } - - private Task findMoveTask( - List> mvTasks, FileSinkOperator fsOp) { - // find the move task - for (Task mvTsk : mvTasks) { - MoveWork mvWork = mvTsk.getWork(); - Path srcDir = null; - if (mvWork.getLoadFileWork() != null) { - srcDir = mvWork.getLoadFileWork().getSourcePath(); - } else if (mvWork.getLoadTableWork() != null) { - srcDir = mvWork.getLoadTableWork().getSourcePath(); - } - - if ((srcDir != null) - && (srcDir.equals(new Path(fsOp.getConf().getFinalDirName())))) { - return mvTsk; - } - } - return null; - } - - /** * Process the FileSink operator to generate a MoveTask if necessary. * * @param fsOp @@ -655,6 +161,11 @@ NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; + Task currTask = ctx.getCurrTask(); + + // If the directory needs to be changed, send the new directory + Path dest = null; + List seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { seenFSOps = new ArrayList(); @@ -664,49 +175,14 @@ } ctx.setSeenFileSinkOps(seenFSOps); - Task currTask = ctx.getCurrTask(); + dest = GenMapRedUtils.createMoveTask(ctx.getCurrTask(), chDir, fsOp, ctx.getParseCtx(), + ctx.getMvTask(), ctx.getConf(), ctx.getDependencyTaskForMultiInsert()); - // If the directory needs to be changed, send the new directory - Path dest = null; - - if (chDir) { - dest = new Path(fsOp.getConf().getFinalDirName()); - - // generate the temporary file - // it must be on the same file system as the current destination - ParseContext parseCtx = ctx.getParseCtx(); - Context baseCtx = parseCtx.getContext(); - String tmpDir = baseCtx.getExternalTmpFileURI(dest.toUri()); - - FileSinkDesc fileSinkDesc = fsOp.getConf(); - // Change all the linked file sink descriptors - if (fileSinkDesc.isLinkedFileSink()) { - for (FileSinkDesc fsConf:fileSinkDesc.getLinkedFileSinkDesc()) { - String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName()); - fsConf.setParentDir(tmpDir); - fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName); - } - } else { - fileSinkDesc.setDirName(tmpDir); - } - } - - Task mvTask = null; - - if (!chDir) { - mvTask = findMoveTask(ctx.getMvTask(), fsOp); - } - Operator currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); HashMap, Task> opTaskMap = ctx.getOpTaskMap(); - // Set the move task to be dependent on the current task - if (mvTask != null) { - addDependentMoveTasks(ctx, mvTask, currTask); - } - // In case of multi-table insert, the path to alias mapping is needed for // all the sources. Since there is no // reducer, treat it as a plan with null reducer Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy) @@ -27,6 +27,7 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -440,8 +441,10 @@ */ public DependencyCollectionTask getDependencyTaskForMultiInsert() { if (dependencyTaskForMultiInsert == null) { - dependencyTaskForMultiInsert = - (DependencyCollectionTask) TaskFactory.get(new DependencyCollectionWork(), conf); + if (conf.getBoolVar(ConfVars.HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES)) { + dependencyTaskForMultiInsert = + (DependencyCollectionTask) TaskFactory.get(new DependencyCollectionWork(), conf); + } } return dependencyTaskForMultiInsert; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -33,11 +33,15 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.DemuxOperator; +import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MoveTask; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; @@ -52,6 +56,8 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; @@ -64,20 +70,31 @@ import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles; +import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx; +import org.apache.hadoop.hive.ql.plan.ConditionalWork; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; +import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.stats.StatsFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.mapred.InputFormat; /** * General utility common functions for the Processor to convert operator into @@ -90,7 +107,7 @@ LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils"); } - private static boolean needsTagging(ReduceWork rWork) { + public static boolean needsTagging(ReduceWork rWork) { return rWork != null && (rWork.getReducer().getClass() == JoinOperator.class || rWork.getReducer().getClass() == DemuxOperator.class); } @@ -444,10 +461,32 @@ public static void setTaskPlan(String alias_id, Operator topOp, Task task, boolean local, GenMRProcContext opProcCtx, PrunedPartitionList pList) throws SemanticException { - MapWork plan = ((MapredWork) task.getWork()).getMapWork(); - ParseContext parseCtx = opProcCtx.getParseCtx(); - Set inputs = opProcCtx.getInputs(); + setMapWork(((MapredWork) task.getWork()).getMapWork(), opProcCtx.getParseCtx(), + opProcCtx.getInputs(), pList, topOp, alias_id, opProcCtx.getConf(), local); + opProcCtx.addSeenOp(task, topOp); + } + /** + * initialize MapWork + * + * @param alias_id + * current alias + * @param topOp + * the top operator of the stack + * @param plan + * map work to initialize + * @param local + * whether you need to add to map-reduce or local work + * @param pList + * pruned partition list. If it is null it will be computed on-the-fly. + * @param inputs + * read entities for the map work + * @param conf + * current instance of hive conf + */ + public static void setMapWork(MapWork plan, ParseContext parseCtx, Set inputs, + PrunedPartitionList partsList, Operator topOp, String alias_id, + HiveConf conf, boolean local) throws SemanticException { ArrayList partDir = new ArrayList(); ArrayList partDesc = new ArrayList(); @@ -454,8 +493,6 @@ Path tblDir = null; TableDesc tblDesc = null; - PrunedPartitionList partsList = pList; - plan.setNameToSplitSample(parseCtx.getNameToSplitSample()); if (partsList == null) { @@ -701,7 +738,6 @@ } plan.setMapLocalWork(localPlan); } - opProcCtx.addSeenOp(task, topOp); } /** @@ -751,6 +787,21 @@ } /** + * Set key and value descriptor + * @param work RedueWork + * @param rs ReduceSinkOperator + */ + public static void setKeyAndValueDesc(ReduceWork work, ReduceSinkOperator rs) { + work.setKeyDesc(rs.getConf().getKeySerializeInfo()); + int tag = Math.max(0, rs.getConf().getTag()); + List tagToSchema = work.getTagToValueDesc(); + while (tag + 1 > tagToSchema.size()) { + tagToSchema.add(null); + } + tagToSchema.set(tag, rs.getConf().getValueSerializeInfo()); + } + + /** * set key and value descriptor. * * @param plan @@ -766,13 +817,7 @@ if (topOp instanceof ReduceSinkOperator) { ReduceSinkOperator rs = (ReduceSinkOperator) topOp; - plan.setKeyDesc(rs.getConf().getKeySerializeInfo()); - int tag = Math.max(0, rs.getConf().getTag()); - List tagToSchema = plan.getTagToValueDesc(); - while (tag + 1 > tagToSchema.size()) { - tagToSchema.add(null); - } - tagToSchema.set(tag, rs.getConf().getValueSerializeInfo()); + setKeyAndValueDesc(plan, rs); } else { List> children = topOp.getChildOperators(); if (children != null) { @@ -1096,6 +1141,571 @@ } } + /** + * @param fsInput The FileSink operator. + * @param ctx The MR processing context. + * @param finalName the final destination path the merge job should output. + * @param dependencyTask + * @param mvTasks + * @param conf + * @param currTask + * @throws SemanticException + + * create a Map-only merge job using CombineHiveInputFormat for all partitions with + * following operators: + * MR job J0: + * ... + * | + * v + * FileSinkOperator_1 (fsInput) + * | + * v + * Merge job J1: + * | + * v + * TableScan (using CombineHiveInputFormat) (tsMerge) + * | + * v + * FileSinkOperator (fsMerge) + * + * Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths + * do + * not contain the dynamic partitions (their parent). So after the dynamic partitions are + * created (after the first job finished before the moveTask or ConditionalTask start), + * we need to change the pathToPartitionInfo & pathToAlias to include the dynamic + * partition + * directories. + * + */ + public static void createMRWorkForMergingFiles (FileSinkOperator fsInput, + Path finalName, DependencyCollectionTask dependencyTask, + List> mvTasks, HiveConf conf, + Task currTask) throws SemanticException { + + // + // 1. create the operator tree + // + FileSinkDesc fsInputDesc = fsInput.getConf(); + + // Create a TableScan operator + RowSchema inputRS = fsInput.getSchema(); + Operator tsMerge = + GenMapRedUtils.createTemporaryTableScanOperator(inputRS); + + // Create a FileSink operator + TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone(); + FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName.toUri().toString(), ts, + conf.getBoolVar(ConfVars.COMPRESSRESULT)); + FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild( + fsOutputDesc, inputRS, tsMerge); + + // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema + // needs to include the partition column, and the fsOutput should have + // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned. + DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx(); + if (dpCtx != null && dpCtx.getNumDPCols() > 0) { + // adding DP ColumnInfo to the RowSchema signature + ArrayList signature = inputRS.getSignature(); + String tblAlias = fsInputDesc.getTableInfo().getTableName(); + LinkedHashMap colMap = new LinkedHashMap(); + StringBuilder partCols = new StringBuilder(); + for (String dpCol : dpCtx.getDPColNames()) { + ColumnInfo colInfo = new ColumnInfo(dpCol, + TypeInfoFactory.stringTypeInfo, // all partition column type should be string + tblAlias, true); // partition column is virtual column + signature.add(colInfo); + colMap.put(dpCol, dpCol); // input and output have the same column name + partCols.append(dpCol).append('/'); + } + partCols.setLength(partCols.length() - 1); // remove the last '/' + inputRS.setSignature(signature); + + // create another DynamicPartitionCtx, which has a different input-to-DP column mapping + DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx); + dpCtx2.setInputToDPCols(colMap); + fsOutputDesc.setDynPartCtx(dpCtx2); + + // update the FileSinkOperator to include partition columns + fsInputDesc.getTableInfo().getProperties().setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, + partCols.toString()); // list of dynamic partition column names + } else { + // non-partitioned table + fsInputDesc.getTableInfo().getProperties().remove( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + } + + // + // 2. Constructing a conditional task consisting of a move task and a map reduce task + // + MoveWork dummyMv = new MoveWork(null, null, null, + new LoadFileDesc(new Path(fsInputDesc.getFinalDirName()), finalName, true, null, null), false); + MapWork cplan; + Serializable work; + + if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && + fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) { + + // Check if InputFormatClass is valid + String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL); + try { + Class c = (Class) Class.forName(inputFormatClass); + + LOG.info("RCFile format- Using block level merge"); + cplan = GenMapRedUtils.createRCFileMergeTask(fsInputDesc, finalName, + dpCtx != null && dpCtx.getNumDPCols() > 0); + work = cplan; + } catch (ClassNotFoundException e) { + String msg = "Illegal input format class: " + inputFormatClass; + throw new SemanticException(msg); + } + + } else { + cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); + work = new MapredWork(); + ((MapredWork)work).setMapWork(cplan); + } + // use CombineHiveInputFormat for map-only merging + cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); + // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't + // know if merge MR2 will be triggered at execution time + ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, + fsInputDesc.getFinalDirName()); + + // keep the dynamic partition context in conditional task resolver context + ConditionalResolverMergeFilesCtx mrCtx = + (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx(); + mrCtx.setDPCtx(fsInputDesc.getDynPartCtx()); + mrCtx.setLbCtx(fsInputDesc.getLbCtx()); + + // + // 3. add the moveTask as the children of the conditional task + // + linkMoveTask(fsOutput, cndTsk, mvTasks, conf, dependencyTask); + } + + /** + * Make the move task in the GenMRProcContext following the FileSinkOperator a dependent of all + * possible subtrees branching from the ConditionalTask. + * + * @param newOutput + * @param cndTsk + * @param mvTasks + * @param hconf + * @param dependencyTask + */ + public static void linkMoveTask(FileSinkOperator newOutput, + ConditionalTask cndTsk, List> mvTasks, HiveConf hconf, + DependencyCollectionTask dependencyTask) { + + Task mvTask = GenMapRedUtils.findMoveTask(mvTasks, newOutput); + + for (Task tsk : cndTsk.getListTasks()) { + linkMoveTask(mvTask, tsk, hconf, dependencyTask); + } + } + + /** + * Follows the task tree down from task and makes all leaves parents of mvTask + * + * @param mvTask + * @param task + * @param hconf + * @param dependencyTask + */ + public static void linkMoveTask(Task mvTask, + Task task, HiveConf hconf, + DependencyCollectionTask dependencyTask) { + + if (task.getDependentTasks() == null || task.getDependentTasks().isEmpty()) { + // If it's a leaf, add the move task as a child + addDependentMoveTasks(mvTask, hconf, task, dependencyTask); + } else { + // Otherwise, for each child run this method recursively + for (Task childTask : task.getDependentTasks()) { + linkMoveTask(mvTask, childTask, hconf, dependencyTask); + } + } + } + + /** + * Adds the dependencyTaskForMultiInsert in ctx as a dependent of parentTask. If mvTask is a + * load table, and HIVE_MULTI_INSERT_ATOMIC_OUTPUTS is set, adds mvTask as a dependent of + * dependencyTaskForMultiInsert in ctx, otherwise adds mvTask as a dependent of parentTask as + * well. + * + * @param mvTask + * @param hconf + * @param parentTask + * @param dependencyTask + */ + public static void addDependentMoveTasks(Task mvTask, HiveConf hconf, + Task parentTask, DependencyCollectionTask dependencyTask) { + + if (mvTask != null) { + if (dependencyTask != null) { + parentTask.addDependentTask(dependencyTask); + if (mvTask.getWork().getLoadTableWork() != null) { + // Moving tables/partitions depend on the dependencyTask + dependencyTask.addDependentTask(mvTask); + } else { + // Moving files depends on the parentTask (we still want the dependencyTask to depend + // on the parentTask) + parentTask.addDependentTask(mvTask); + } + } else { + parentTask.addDependentTask(mvTask); + } + } + } + + + /** + * Add the StatsTask as a dependent task of the MoveTask + * because StatsTask will change the Table/Partition metadata. For atomicity, we + * should not change it before the data is actually there done by MoveTask. + * + * @param nd + * the FileSinkOperator whose results are taken care of by the MoveTask. + * @param mvTask + * The MoveTask that moves the FileSinkOperator's results. + * @param currTask + * The MapRedTask that the FileSinkOperator belongs to. + * @param hconf + * HiveConf + */ + public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask, + Task currTask, HiveConf hconf) { + + MoveWork mvWork = mvTask.getWork(); + StatsWork statsWork = null; + if (mvWork.getLoadTableWork() != null) { + statsWork = new StatsWork(mvWork.getLoadTableWork()); + } else if (mvWork.getLoadFileWork() != null) { + statsWork = new StatsWork(mvWork.getLoadFileWork()); + } + assert statsWork != null : "Error when genereting StatsTask"; + + statsWork.setSourceTask(currTask); + statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); + + if (currTask.getWork() instanceof MapredWork) { + MapredWork mrWork = (MapredWork) currTask.getWork(); + mrWork.getMapWork().setGatheringStats(true); + if (mrWork.getReduceWork() != null) { + mrWork.getReduceWork().setGatheringStats(true); + } + } + + // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix + // in FileSinkDesc is used for stats publishing. They should be consistent. + statsWork.setAggKey(nd.getConf().getStatsAggPrefix()); + Task statsTask = TaskFactory.get(statsWork, hconf); + + // mark the MapredWork and FileSinkOperator for gathering stats + nd.getConf().setGatherStats(true); + nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); + nd.getConf().setMaxStatsKeyPrefixLength(StatsFactory.getMaxPrefixLength(hconf)); + // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); + + // subscribe feeds from the MoveTask so that MoveTask can forward the list + // of dynamic partition list to the StatsTask + mvTask.addDependentTask(statsTask); + statsTask.subscribeFeed(mvTask); + } + + /** + * Returns true iff current query is an insert into for the given file sink + * + * @param parseCtx + * @param fsOp + * @return + */ + public static boolean isInsertInto(ParseContext parseCtx, FileSinkOperator fsOp) { + return fsOp.getConf().getTableInfo().getTableName() != null && + parseCtx.getQB().getParseInfo().isInsertToTable(); + } + + /** + * Create a MapredWork based on input path, the top operator and the input + * table descriptor. + * + * @param conf + * @param topOp + * the table scan operator that is the root of the MapReduce task. + * @param fsDesc + * the file sink descriptor that serves as the input to this merge task. + * @param parentMR + * the parent MapReduce work + * @param parentFS + * the last FileSinkOperator in the parent MapReduce work + * @return the MapredWork + */ + private static MapWork createMRWorkForMergingFiles (HiveConf conf, + Operator topOp, FileSinkDesc fsDesc) { + + ArrayList aliases = new ArrayList(); + String inputDir = fsDesc.getFinalDirName(); + TableDesc tblDesc = fsDesc.getTableInfo(); + aliases.add(inputDir); // dummy alias: just use the input path + + // constructing the default MapredWork + MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf); + MapWork cplan = cMrPlan.getMapWork(); + cplan.getPathToAliases().put(inputDir, aliases); + cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null)); + cplan.getAliasToWork().put(inputDir, topOp); + cplan.setMapperCannotSpanPartns(true); + + return cplan; + } + + /** + * Create a block level merge task for RCFiles. + * + * @param fsInputDesc + * @param finalName + * @return MergeWork if table is stored as RCFile, + * null otherwise + */ + public static MapWork createRCFileMergeTask(FileSinkDesc fsInputDesc, + Path finalName, boolean hasDynamicPartitions) throws SemanticException { + + String inputDir = fsInputDesc.getFinalDirName(); + TableDesc tblDesc = fsInputDesc.getTableInfo(); + + if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) { + ArrayList inputDirs = new ArrayList(1); + ArrayList inputDirstr = new ArrayList(1); + if (!hasDynamicPartitions + && !GenMapRedUtils.isSkewedStoredAsDirs(fsInputDesc)) { + inputDirs.add(new Path(inputDir)); + inputDirstr.add(inputDir); + } + + MergeWork work = new MergeWork(inputDirs, finalName, + hasDynamicPartitions, fsInputDesc.getDynPartCtx()); + LinkedHashMap> pathToAliases = + new LinkedHashMap>(); + pathToAliases.put(inputDir, (ArrayList) inputDirstr.clone()); + work.setMapperCannotSpanPartns(true); + work.setPathToAliases(pathToAliases); + work.setAliasToWork( + new LinkedHashMap>()); + if (hasDynamicPartitions + || GenMapRedUtils.isSkewedStoredAsDirs(fsInputDesc)) { + work.getPathToPartitionInfo().put(inputDir, + new PartitionDesc(tblDesc, null)); + } + work.setListBucketingCtx(fsInputDesc.getLbCtx()); + + return work; + } + + throw new SemanticException("createRCFileMergeTask called on non-RCFile table"); + } + + /** + * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork. + * + * @param conf + * HiveConf + * @param currTask + * current leaf task + * @param mvWork + * MoveWork for the move task + * @param mergeWork + * MapredWork for the merge task. + * @param inputPath + * the input directory of the merge/move task + * @return The conditional task + */ + @SuppressWarnings("unchecked") + public static ConditionalTask createCondTask(HiveConf conf, + Task currTask, MoveWork mvWork, + Serializable mergeWork, String inputPath) { + + // There are 3 options for this ConditionalTask: + // 1) Merge the partitions + // 2) Move the partitions (i.e. don't merge the partitions) + // 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't + // merge others) in this case the merge is done first followed by the move to prevent + // conflicts. + Task mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf); + Task moveOnlyMoveTask = TaskFactory.get(mvWork, conf); + Task mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf); + Task mergeAndMoveMoveTask = TaskFactory.get(mvWork, conf); + + // NOTE! It is necessary merge task is the parent of the move task, and not + // the other way around, for the proper execution of the execute method of + // ConditionalTask + mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask); + + List listWorks = new ArrayList(); + listWorks.add(mvWork); + listWorks.add(mergeWork); + + ConditionalWork cndWork = new ConditionalWork(listWorks); + + List> listTasks = new ArrayList>(); + listTasks.add(moveOnlyMoveTask); + listTasks.add(mergeOnlyMergeTask); + listTasks.add(mergeAndMoveMergeTask); + + ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf); + cndTsk.setListTasks(listTasks); + + // create resolver + cndTsk.setResolver(new ConditionalResolverMergeFiles()); + ConditionalResolverMergeFilesCtx mrCtx = + new ConditionalResolverMergeFilesCtx(listTasks, inputPath); + cndTsk.setResolverCtx(mrCtx); + + // make the conditional task as the child of the current leaf task + currTask.addDependentTask(cndTsk); + + return cndTsk; + } + + /** + * check if it is skewed table and stored as dirs. + * + * @param fsInputDesc + * @return + */ + public static boolean isSkewedStoredAsDirs(FileSinkDesc fsInputDesc) { + return (fsInputDesc.getLbCtx() == null) ? false : fsInputDesc.getLbCtx() + .isSkewedStoredAsDir(); + } + + public static Task findMoveTask( + List> mvTasks, FileSinkOperator fsOp) { + // find the move task + for (Task mvTsk : mvTasks) { + MoveWork mvWork = mvTsk.getWork(); + Path srcDir = null; + if (mvWork.getLoadFileWork() != null) { + srcDir = mvWork.getLoadFileWork().getSourcePath(); + } else if (mvWork.getLoadTableWork() != null) { + srcDir = mvWork.getLoadTableWork().getSourcePath(); + } + + if ((srcDir != null) + && (srcDir.equals(new Path(fsOp.getConf().getFinalDirName())))) { + return mvTsk; + } + } + return null; + } + + /** + * Returns true iff the fsOp requires a merge + * @param mvTasks + * @param hconf + * @param fsOp + * @param currTask + * @param isInsertTable + * @return + */ + public static boolean isMergeRequired(List> mvTasks, HiveConf hconf, FileSinkOperator fsOp, + Task currTask, boolean isInsertTable) { + + // Has the user enabled merging of files for map-only jobs or for all jobs + if ((mvTasks != null) && (!mvTasks.isEmpty())) { + + // no need of merging if the move is to a local file system + MoveTask mvTask = (MoveTask) GenMapRedUtils.findMoveTask(mvTasks, fsOp); + + if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) { + GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf); + } + + if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { + if (fsOp.getConf().isLinkedFileSink()) { + // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the + // number of reducers are few, so the number of files anyway are small. + // However, with this optimization, we are increasing the number of files + // possibly by a big margin. So, merge aggresively. + if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || + hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) { + return true; + } + } else { + // There are separate configuration parameters to control whether to + // merge for a map-only job + // or for a map-reduce job + if (currTask.getWork() instanceof MapredWork) { + ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork(); + boolean mergeMapOnly = + hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null; + boolean mergeMapRed = + hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && + reduceWork != null; + if (mergeMapOnly || mergeMapRed) { + return true; + } + } else { + return false; + } + } + } + } + return false; + } + + /** + * Create and add any dependent move tasks + * + * @param currTask + * @param chDir + * @param fsOp + * @param parseCtx + * @param mvTasks + * @param hconf + * @param dependencyTask + * @return + */ + public static Path createMoveTask(Task currTask, boolean chDir, + FileSinkOperator fsOp, ParseContext parseCtx, List> mvTasks, + HiveConf hconf, DependencyCollectionTask dependencyTask) { + + Path dest = null; + + if (chDir) { + dest = new Path(fsOp.getConf().getFinalDirName()); + + // generate the temporary file + // it must be on the same file system as the current destination + Context baseCtx = parseCtx.getContext(); + String tmpDir = baseCtx.getExternalTmpFileURI(dest.toUri()); + + FileSinkDesc fileSinkDesc = fsOp.getConf(); + // Change all the linked file sink descriptors + if (fileSinkDesc.isLinkedFileSink()) { + for (FileSinkDesc fsConf:fileSinkDesc.getLinkedFileSinkDesc()) { + String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName()); + fsConf.setParentDir(tmpDir); + fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName); + } + } else { + fileSinkDesc.setDirName(tmpDir); + } + } + + Task mvTask = null; + + if (!chDir) { + mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOp); + } + + // Set the move task to be dependent on the current task + if (mvTask != null) { + GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask); + } + + return dest; + } + private GenMapRedUtils() { // prevent instantiation } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (working copy) @@ -286,7 +286,7 @@ currOp = currOp.getParentOperators().get(0); while (true) { - if (currOp.getParentOperators() == null) { + if ((currOp.getParentOperators() == null) || (currOp.getParentOperators().isEmpty())) { break; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -236,13 +236,14 @@ * @return the alias to the big table * @throws SemanticException */ - public static String genMapJoinOpAndLocalWork(MapredWork newWork, JoinOperator op, int mapJoinPos) + public static String genMapJoinOpAndLocalWork(HiveConf conf, MapredWork newWork, + JoinOperator op, int mapJoinPos) throws SemanticException { LinkedHashMap, OpParseContext> opParseCtxMap = newWork.getMapWork().getOpParseCtxMap(); QBJoinTree newJoinTree = newWork.getMapWork().getJoinTree(); // generate the map join operator; already checked the map join - MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(opParseCtxMap, op, + MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(conf, opParseCtxMap, op, newJoinTree, mapJoinPos, true, false); return genLocalWorkForMapJoin(newWork, newMapJoinOp, mapJoinPos); } @@ -315,7 +316,7 @@ * are cached in memory * @param noCheckOuterJoin */ - public static MapJoinOperator convertMapJoin( + public static MapJoinOperator convertMapJoin(HiveConf conf, LinkedHashMap, OpParseContext> opParseCtxMap, JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin, boolean validateMapJoinTree) @@ -372,21 +373,90 @@ pos++; } + // create the map-join operator + MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, opParseCtxMap, + op, joinTree, mapJoinPos, noCheckOuterJoin); + + + // remove old parents + for (pos = 0; pos < newParentOps.size(); pos++) { + newParentOps.get(pos).removeChild(oldReduceSinkParentOps.get(pos)); + newParentOps.get(pos).getChildOperators().add(mapJoinOp); + } + + + mapJoinOp.getParentOperators().removeAll(oldReduceSinkParentOps); + mapJoinOp.setParentOperators(newParentOps); + + // make sure only map-joins can be performed. + if (validateMapJoinTree) { + validateMapJoinTypes(mapJoinOp); + } + + // change the children of the original join operator to point to the map + // join operator + + return mapJoinOp; + } + + public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, + LinkedHashMap, OpParseContext> opParseCtxMap, + JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin) + throws SemanticException { + + JoinDesc desc = op.getConf(); + JoinCondDesc[] condns = desc.getConds(); + Byte[] tagOrder = desc.getTagOrder(); + + // outer join cannot be performed on a table which is being cached + if (!noCheckOuterJoin) { + if (checkMapJoin(mapJoinPos, condns) < 0) { + throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg()); + } + } + + Map> keyExprMap = new HashMap>(); + + // Walk over all the sources (which are guaranteed to be reduce sink + // operators). + // The join outputs a concatenation of all the inputs. + QBJoinTree leftSrc = joinTree.getJoinSrc(); + List> oldReduceSinkParentOps = + new ArrayList>(); + if (leftSrc != null) { + // assert mapJoinPos == 0; + Operator parentOp = op.getParentOperators().get(0); + assert parentOp.getParentOperators().size() == 1; + oldReduceSinkParentOps.add(parentOp); + } + + + byte pos = 0; + for (String src : joinTree.getBaseSrc()) { + if (src != null) { + Operator parentOp = op.getParentOperators().get(pos); + assert parentOp.getParentOperators().size() == 1; + oldReduceSinkParentOps.add(parentOp); + } + pos++; + } + // get the join keys from old parent ReduceSink operators - for (pos = 0; pos < newParentOps.size(); pos++) { - ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos); - ReduceSinkDesc rsconf = oldPar.getConf(); + for (pos = 0; pos < op.getParentOperators().size(); pos++) { + ReduceSinkOperator parent = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos); + ReduceSinkDesc rsconf = parent.getConf(); List keys = rsconf.getKeyCols(); keyExprMap.put(pos, keys); } - // removing RS, only ExprNodeDesc is changed (key/value/filter exprs and colExprMap) - // others (output column-name, RR, schema) remain intact + List keyCols = keyExprMap.get(Byte.valueOf((byte) 0)); + StringBuilder keyOrder = new StringBuilder(); + for (int i = 0; i < keyCols.size(); i++) { + keyOrder.append("+"); + } + Map colExprMap = op.getColumnExprMap(); - List outputColumnNames = op.getConf().getOutputColumnNames(); - List schema = new ArrayList(op.getSchema().getSignature()); - Map> valueExprs = op.getConf().getExprs(); Map> newValueExprs = new HashMap>(); for (Map.Entry> entry : valueExprs.entrySet()) { @@ -410,45 +480,12 @@ } } - Map> filters = desc.getFilters(); - Map> newFilters = new HashMap>(); - for (Map.Entry> entry : filters.entrySet()) { - byte srcTag = entry.getKey(); - List filter = entry.getValue(); - - Operator terminal = oldReduceSinkParentOps.get(srcTag); - newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal)); - } - desc.setFilters(filters = newFilters); - - // remove old parents - for (pos = 0; pos < newParentOps.size(); pos++) { - newParentOps.get(pos).removeChild(oldReduceSinkParentOps.get(pos)); - } - - JoinCondDesc[] joinCondns = op.getConf().getConds(); - - Operator[] newPar = new Operator[newParentOps.size()]; - pos = 0; - for (Operator o : newParentOps) { - newPar[pos++] = o; - } - - List keyCols = keyExprMap.get(Byte.valueOf((byte) 0)); - StringBuilder keyOrder = new StringBuilder(); - for (int i = 0; i < keyCols.size(); i++) { - keyOrder.append("+"); - } - - TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(PlanUtils - .getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX)); - + // construct valueTableDescs and valueFilteredTableDescs List valueTableDescs = new ArrayList(); List valueFiltedTableDescs = new ArrayList(); - int[][] filterMap = desc.getFilterMap(); - for (pos = 0; pos < newParentOps.size(); pos++) { - List valueCols = newValueExprs.get(pos); + for (pos = 0; pos < op.getParentOperators().size(); pos++) { + List valueCols = newValueExprs.get(Byte.valueOf((byte) pos)); int length = valueCols.size(); List valueFilteredCols = new ArrayList(length); // deep copy expr node desc @@ -475,6 +512,19 @@ valueTableDescs.add(valueTableDesc); valueFiltedTableDescs.add(valueFilteredTableDesc); } + + Map> filters = desc.getFilters(); + Map> newFilters = new HashMap>(); + for (Map.Entry> entry : filters.entrySet()) { + byte srcTag = entry.getKey(); + List filter = entry.getValue(); + + Operator terminal = op.getParentOperators().get(srcTag); + newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal)); + } + desc.setFilters(filters = newFilters); + + // create dumpfile prefix needed to create descriptor String dumpFilePrefix = ""; if( joinTree.getMapAliases() != null ) { for(String mapAlias : joinTree.getMapAliases()) { @@ -484,15 +534,24 @@ } else { dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix(); } + + List outputColumnNames = op.getConf().getOutputColumnNames(); + TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, + PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX)); + JoinCondDesc[] joinCondns = op.getConf().getConds(); MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op.getConf().getNoOuterJoin(), dumpFilePrefix); + mapJoinDescriptor.setStatistics(op.getConf().getStatistics()); mapJoinDescriptor.setTagOrder(tagOrder); mapJoinDescriptor.setNullSafes(desc.getNullSafes()); mapJoinDescriptor.setFilterMap(desc.getFilterMap()); + // reduce sink row resolver used to generate map join op + RowResolver outputRS = opParseCtxMap.get(op).getRowResolver(); + MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild( - mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), newPar); + mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), op.getParentOperators()); OpParseContext ctx = new OpParseContext(outputRS); opParseCtxMap.put(mapJoinOp, ctx); @@ -500,8 +559,6 @@ mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs()); mapJoinOp.setColumnExprMap(colExprMap); - // change the children of the original join operator to point to the map - // join operator List> childOps = op.getChildOperators(); for (Operator childOp : childOps) { childOp.replaceParent(op, mapJoinOp); @@ -508,16 +565,11 @@ } mapJoinOp.setChildOperators(childOps); - mapJoinOp.setParentOperators(newParentOps); op.setChildOperators(null); op.setParentOperators(null); - // make sure only map-joins can be performed. - if (validateMapJoinTree) { - validateMapJoinTypes(mapJoinOp); - } + return mapJoinOp; - return mapJoinOp; } /** @@ -533,7 +585,7 @@ * are cached in memory * @param noCheckOuterJoin */ - public static MapJoinOperator convertSMBJoinToMapJoin( + public static MapJoinOperator convertSMBJoinToMapJoin(HiveConf hconf, Map, OpParseContext> opParseCtxMap, SMBMapJoinOperator smbJoinOp, QBJoinTree joinTree, int bigTablePos, boolean noCheckOuterJoin) throws SemanticException { @@ -540,7 +592,7 @@ // Create a new map join operator SMBJoinDesc smbJoinDesc = smbJoinOp.getConf(); List keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0)); - TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(PlanUtils + TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils .getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX)); MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(), keyTableDesc, smbJoinDesc.getExprs(), @@ -549,6 +601,8 @@ bigTablePos, smbJoinDesc.getConds(), smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix()); + mapJoinDesc.setStatistics(smbJoinDesc.getStatistics()); + RowResolver joinRS = opParseCtxMap.get(smbJoinOp).getRowResolver(); // The mapjoin has the same schema as the join operator MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild( @@ -588,8 +642,8 @@ LinkedHashMap, OpParseContext> opParseCtxMap = pctx .getOpParseCtx(); - MapJoinOperator mapJoinOp = convertMapJoin(opParseCtxMap, op, joinTree, mapJoinPos, - noCheckOuterJoin, true); + MapJoinOperator mapJoinOp = convertMapJoin(pctx.getConf(), opParseCtxMap, op, + joinTree, mapJoinPos, noCheckOuterJoin, true); // create a dummy select to select all columns genSelectPlan(pctx, mapJoinOp); return mapJoinOp; @@ -609,7 +663,7 @@ * If see a right outer join, set lastSeenRightOuterJoin to true, clear the * bigTableCandidates, and add right side to the bigTableCandidates, it means * the right side of a right outer join always win. If see a full outer join, - * return null immediately (no one can be the big table, can not do a + * return empty set immediately (no one can be the big table, can not do a * mapjoin). * * @@ -635,7 +689,8 @@ // changed in future, these 2 are not missing. seenOuterJoin = true; lastSeenRightOuterJoin = false; - return null; + // empty set - cannot convert + return new HashSet(); } else if (joinType == JoinDesc.LEFT_OUTER_JOIN || joinType == JoinDesc.LEFT_SEMI_JOIN) { seenOuterJoin = true; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java (working copy) @@ -398,7 +398,8 @@ return parseContext.getTopToTable().get(tsOp); } } - if ((op.getParentOperators() == null) || (op.getParentOperators().size() > 1)) { + if ((op.getParentOperators() == null) || (op.getParentOperators().isEmpty()) || + (op.getParentOperators().size() > 1)) { return null; } op = op.getParentOperators().get(0); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java (working copy) @@ -168,7 +168,7 @@ int numAliases = order.length; Set bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc.getConds()); - if (bigTableCandidates == null) { + if (bigTableCandidates.isEmpty()) { continue; } @@ -346,7 +346,7 @@ "involved in this operator"); return correlatedReduceSinkOperators; } - if (current.getParentOperators() == null) { + if ((current.getParentOperators() == null) || (current.getParentOperators().isEmpty())) { return correlatedReduceSinkOperators; } if (current instanceof PTFOperator) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java (working copy) @@ -189,7 +189,8 @@ // optimize this newWork given the big table position String bigTableAlias = - MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, bigTablePosition); + MapJoinProcessor.genMapJoinOpAndLocalWork(physicalContext.getParseContext().getConf(), + newWork, newJoinOp, bigTablePosition); return new ObjectPair(newTask, bigTableAlias); } @@ -434,7 +435,7 @@ .getConds()); // no table could be the big table; there is no need to convert - if (bigTableCandidates == null) { + if (bigTableCandidates.isEmpty()) { return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java (working copy) @@ -205,7 +205,7 @@ Operator currOp = originalSMBJoinOp; while (true) { - if (currOp.getChildOperators() == null) { + if ((currOp.getChildOperators() == null) || (currOp.getChildOperators().isEmpty())) { if (currOp instanceof FileSinkOperator) { FileSinkOperator fsOp = (FileSinkOperator)currOp; // The query has enforced that a sort-merge join should be performed. @@ -433,7 +433,8 @@ opParseContextMap.put(newSMBJoinOp, opParseContextMap.get(oldSMBJoinOp)); // generate the map join operator - return MapJoinProcessor.convertSMBJoinToMapJoin(opParseContextMap, newSMBJoinOp, + return MapJoinProcessor.convertSMBJoinToMapJoin(physicalContext.getConf(), + opParseContextMap, newSMBJoinOp, joinTree, mapJoinPos, true); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java (working copy) @@ -22,10 +22,10 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -33,21 +33,14 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; @@ -61,9 +54,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; @@ -73,302 +63,25 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1; import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1; -import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; -import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; -import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; -import org.apache.hadoop.hive.ql.plan.CreateTableDesc; -import org.apache.hadoop.hive.ql.plan.DDLWork; -import org.apache.hadoop.hive.ql.plan.FetchWork; -import org.apache.hadoop.hive.ql.plan.LoadFileDesc; -import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.shims.ShimLoader; -public class MapReduceCompiler { +public class MapReduceCompiler extends TaskCompiler { protected final Log LOG = LogFactory.getLog(MapReduceCompiler.class); - private Hive db; - protected LogHelper console; - private HiveConf conf; - public MapReduceCompiler() { } - public void init(HiveConf conf, LogHelper console, Hive db) { - this.conf = conf; - this.db = db; - this.console = console; - } - - @SuppressWarnings({"nls", "unchecked"}) - public void compile(final ParseContext pCtx, final List> rootTasks, - final HashSet inputs, final HashSet outputs) throws SemanticException { - - Context ctx = pCtx.getContext(); - GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx(); - QB qb = pCtx.getQB(); - List> mvTask = new ArrayList>(); - - List loadTableWork = pCtx.getLoadTableWork(); - List loadFileWork = pCtx.getLoadFileWork(); - - boolean isCStats = qb.isAnalyzeRewrite(); - - if (pCtx.getFetchTask() != null) { - return; - } - - /* - * In case of a select, use a fetch task instead of a move task. - * If the select is from analyze table column rewrite, don't create a fetch task. Instead create - * a column stats task later. - */ - if (pCtx.getQB().getIsQuery() && !isCStats) { - if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) { - throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); - } - - LoadFileDesc loadFileDesc = loadFileWork.get(0); - - String cols = loadFileDesc.getColumns(); - String colTypes = loadFileDesc.getColumnTypes(); - - TableDesc resultTab = pCtx.getFetchTabledesc(); - if (resultTab == null) { - String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); - resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); - } - - FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), - resultTab, qb.getParseInfo().getOuterQueryLimit()); - fetch.setSource(pCtx.getFetchSource()); - fetch.setSink(pCtx.getFetchSink()); - - pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf)); - - // For the FetchTask, the limit optimization requires we fetch all the rows - // in memory and count how many rows we get. It's not practical if the - // limit factor is too big - int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH); - if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) { - LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit - + ". Doesn't qualify limit optimiztion."); - globalLimitCtx.disableOpt(); - } - } else if (!isCStats) { - for (LoadTableDesc ltd : loadTableWork) { - Task tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf); - mvTask.add(tsk); - // Check to see if we are stale'ing any indexes and auto-update them if we want - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { - IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf); - try { - List> indexUpdateTasks = indexUpdater - .generateUpdateTasks(); - for (Task updateTask : indexUpdateTasks) { - tsk.addDependentTask(updateTask); - } - } catch (HiveException e) { - console - .printInfo("WARNING: could not auto-update stale indexes, which are not in sync"); - } - } - } - - boolean oneLoadFile = true; - for (LoadFileDesc lfd : loadFileWork) { - if (qb.isCTAS()) { - assert (oneLoadFile); // should not have more than 1 load file for - // CTAS - // make the movetask's destination directory the table's destination. - Path location; - String loc = qb.getTableDesc().getLocation(); - if (loc == null) { - // get the table's default location - Table dumpTable; - Path targetPath; - try { - dumpTable = db.newTable(qb.getTableDesc().getTableName()); - if (!db.databaseExists(dumpTable.getDbName())) { - throw new SemanticException("ERROR: The database " + dumpTable.getDbName() - + " does not exist."); - } - Warehouse wh = new Warehouse(conf); - targetPath = wh.getTablePath(db.getDatabase(dumpTable.getDbName()), dumpTable - .getTableName()); - } catch (HiveException e) { - throw new SemanticException(e); - } catch (MetaException e) { - throw new SemanticException(e); - } - - location = targetPath; - } else { - location = new Path(loc); - } - lfd.setTargetDir(location); - - oneLoadFile = false; - } - mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf)); - } - } - - // generate map reduce plans - ParseContext tempParseContext = getParseContext(pCtx, rootTasks); - GenMRProcContext procCtx = new GenMRProcContext( - conf, - new HashMap, Task>(), - tempParseContext, mvTask, rootTasks, - new LinkedHashMap, GenMapRedCtx>(), - inputs, outputs); - - // create a walker which walks the tree in a DFS manner while maintaining - // the operator stack. - // The dispatcher generates the plan from the operator tree - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp(new String("R1"), - TableScanOperator.getOperatorName() + "%"), - new GenMRTableScan1()); - opRules.put(new RuleRegExp(new String("R2"), - TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink1()); - opRules.put(new RuleRegExp(new String("R3"), - ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink2()); - opRules.put(new RuleRegExp(new String("R4"), - FileSinkOperator.getOperatorName() + "%"), - new GenMRFileSink1()); - opRules.put(new RuleRegExp(new String("R5"), - UnionOperator.getOperatorName() + "%"), - new GenMRUnion1()); - opRules.put(new RuleRegExp(new String("R6"), - UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink3()); - opRules.put(new RuleRegExp(new String("R7"), - MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getTableScanMapJoin()); - - // The dispatcher fires the processor corresponding to the closest matching - // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, - procCtx); - - GraphWalker ogw = new GenMapRedWalker(disp); - ArrayList topNodes = new ArrayList(); - topNodes.addAll(pCtx.getTopOps().values()); - ogw.startWalking(topNodes, null); - - /* - * If the query was the result of analyze table column compute statistics rewrite, create - * a column stats task instead of a fetch task to persist stats to the metastore. - */ - if (isCStats) { - genColumnStatsTask(qb, loadTableWork, loadFileWork, rootTasks); - } - - // reduce sink does not have any kids - since the plan by now has been - // broken up into multiple - // tasks, iterate over all tasks. - // For each task, go over all operators recursively - for (Task rootTask : rootTasks) { - breakTaskTree(rootTask); - } - - // For each task, set the key descriptor for the reducer - for (Task rootTask : rootTasks) { - GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask); - } - - // If a task contains an operator which instructs bucketizedhiveinputformat - // to be used, please do so - for (Task rootTask : rootTasks) { - setInputFormat(rootTask); - } - - PhysicalContext physicalContext = new PhysicalContext(conf, - getParseContext(pCtx, rootTasks), ctx, rootTasks, pCtx.getFetchTask()); - PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer( - physicalContext, conf); - physicalOptimizer.optimize(); - - decideExecMode(rootTasks, ctx, globalLimitCtx); - - if (qb.isCTAS()) { - // generate a DDL task and make it a dependent task of the leaf - CreateTableDesc crtTblDesc = qb.getTableDesc(); - - crtTblDesc.validate(); - - // Clear the output for CTAS since we don't need the output from the - // mapredWork, the - // DDLWork at the tail of the chain will have the output - outputs.clear(); - - Task crtTblTask = TaskFactory.get(new DDLWork( - inputs, outputs, crtTblDesc), conf); - - // find all leaf tasks and make the DDLTask as a dependent task of all of - // them - HashSet> leaves = new HashSet>(); - getLeafTasks(rootTasks, leaves); - assert (leaves.size() > 0); - for (Task task : leaves) { - if (task instanceof StatsTask) { - // StatsTask require table to already exist - for (Task parentOfStatsTask : task.getParentTasks()) { - parentOfStatsTask.addDependentTask(crtTblTask); - } - for (Task parentOfCrtTblTask : crtTblTask.getParentTasks()) { - parentOfCrtTblTask.removeDependentTask(task); - } - crtTblTask.addDependentTask(task); - } else { - task.addDependentTask(crtTblTask); - } - } - } - - if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) { - LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit()); - pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit()); - } - - if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) { - LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit()); - globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit()); - List mrTasks = Utilities.getMRTasks(rootTasks); - for (ExecDriver tsk : mrTasks) { - tsk.setRetryCmdWhenFail(true); - } - } - } - - private void setInputFormat(MapWork work, Operator op) { - if (op.isUseBucketizedHiveInputFormat()) { - work.setUseBucketizedHiveInputFormat(true); - return; - } - - if (op.getChildOperators() != null) { - for (Operator childOp : op.getChildOperators()) { - setInputFormat(work, childOp); - } - } - } - // loop over all the tasks recursively - private void setInputFormat(Task task) { + @Override + protected void setInputFormat(Task task) { if (task instanceof ExecDriver) { MapWork work = ((MapredWork) task.getWork()).getMapWork(); HashMap> opMap = work.getAliasToWork(); @@ -392,6 +105,20 @@ } } + private void setInputFormat(MapWork work, Operator op) { + if (op.isUseBucketizedHiveInputFormat()) { + work.setUseBucketizedHiveInputFormat(true); + return; + } + + if (op.getChildOperators() != null) { + for (Operator childOp : op.getChildOperators()) { + setInputFormat(work, childOp); + } + } + } + + @Override public ParseContext getParseContext(ParseContext pCtx, List> rootTasks) { return new ParseContext(conf, pCtx.getQB(), pCtx.getParseTree(), pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), @@ -452,67 +179,6 @@ } /** - * A helper function to generate a column stats task on top of map-red task. The column stats - * task fetches from the output of the map-red task, constructs the column stats object and - * persists it to the metastore. - * - * This method generates a plan with a column stats task on top of map-red task and sets up the - * appropriate metadata to be used during execution. - * - * @param qb - */ - @SuppressWarnings("unchecked") - private void genColumnStatsTask(QB qb, List loadTableWork, - List loadFileWork, List> rootTasks) { - QBParseInfo qbParseInfo = qb.getParseInfo(); - ColumnStatsTask cStatsTask = null; - ColumnStatsWork cStatsWork = null; - FetchWork fetch = null; - String tableName = qbParseInfo.getTableName(); - String partName = qbParseInfo.getPartName(); - List colName = qbParseInfo.getColName(); - List colType = qbParseInfo.getColType(); - boolean isTblLevel = qbParseInfo.isTblLvl(); - - String cols = loadFileWork.get(0).getColumns(); - String colTypes = loadFileWork.get(0).getColumnTypes(); - - String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); - TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); - - fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), - resultTab, qb.getParseInfo().getOuterQueryLimit()); - - ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, partName, - colName, colType, isTblLevel); - cStatsWork = new ColumnStatsWork(fetch, cStatsDesc); - cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf); - rootTasks.add(cStatsTask); - } - - /** - * Find all leaf tasks of the list of root tasks. - */ - private void getLeafTasks(List> rootTasks, - HashSet> leaves) { - - for (Task root : rootTasks) { - getLeafTasks(root, leaves); - } - } - - private void getLeafTasks(Task task, - HashSet> leaves) { - if (task.getDependentTasks() == null) { - if (!leaves.contains(task)) { - leaves.add(task); - } - } else { - getLeafTasks(task.getDependentTasks(), leaves); - } - } - - /** * Make a best guess at trying to find the number of reducers */ private static int getNumberOfReducers(MapredWork mrwork, HiveConf conf) { @@ -527,7 +193,8 @@ return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); } - private void decideExecMode(List> rootTasks, Context ctx, + @Override + protected void decideExecMode(List> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException { @@ -603,4 +270,74 @@ console.printInfo("Automatically selecting local only mode for query"); } } + + @Override + protected void optimizeTaskPlan(List> rootTasks, + ParseContext pCtx, Context ctx) throws SemanticException { + // reduce sink does not have any kids - since the plan by now has been + // broken up into multiple + // tasks, iterate over all tasks. + // For each task, go over all operators recursively + for (Task rootTask : rootTasks) { + breakTaskTree(rootTask); + } + + + PhysicalContext physicalContext = new PhysicalContext(conf, + getParseContext(pCtx, rootTasks), ctx, rootTasks, pCtx.getFetchTask()); + PhysicalOptimizer physicalOptimizer = new PhysicalOptimizer( + physicalContext, conf); + physicalOptimizer.optimize(); + + } + + @Override + protected void generateTaskTree(List> rootTasks, ParseContext pCtx, + List> mvTask, Set inputs, Set outputs) throws SemanticException { + + // generate map reduce plans + ParseContext tempParseContext = getParseContext(pCtx, rootTasks); + GenMRProcContext procCtx = new GenMRProcContext( + conf, + new HashMap, Task>(), + tempParseContext, mvTask, rootTasks, + new LinkedHashMap, GenMapRedCtx>(), + inputs, outputs); + + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. + // The dispatcher generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp(new String("R1"), + TableScanOperator.getOperatorName() + "%"), + new GenMRTableScan1()); + opRules.put(new RuleRegExp(new String("R2"), + TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), + new GenMRRedSink1()); + opRules.put(new RuleRegExp(new String("R3"), + ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), + new GenMRRedSink2()); + opRules.put(new RuleRegExp(new String("R4"), + FileSinkOperator.getOperatorName() + "%"), + new GenMRFileSink1()); + opRules.put(new RuleRegExp(new String("R5"), + UnionOperator.getOperatorName() + "%"), + new GenMRUnion1()); + opRules.put(new RuleRegExp(new String("R6"), + UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), + new GenMRRedSink3()); + opRules.put(new RuleRegExp(new String("R7"), + MapJoinOperator.getOperatorName() + "%"), + MapJoinFactory.getTableScanMapJoin()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, + procCtx); + + GraphWalker ogw = new GenMapRedWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pCtx.getTopOps().values()); + ogw.startWalking(topNodes, null); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -8958,7 +8958,7 @@ if (!ctx.getExplainLogical()) { // At this point we have the complete operator tree // from which we want to create the map-reduce plan - MapReduceCompiler compiler = new MapReduceCompiler(); + TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx); compiler.init(conf, console, db); compiler.compile(pCtx, rootTasks, inputs, outputs); fetchTask = pCtx.getFetchTask(); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (working copy) @@ -239,7 +239,7 @@ // and filters. while (true) { parentOps = currOp.getParentOperators(); - if (parentOps == null) { + if ((parentOps == null) || (parentOps.isEmpty())) { return (TableScanOperator) currOp; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java (working copy) @@ -0,0 +1,381 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; +import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; +import org.apache.hadoop.hive.ql.plan.CreateTableDesc; +import org.apache.hadoop.hive.ql.plan.DDLWork; +import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.LoadFileDesc; +import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.MoveWork; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; + +/** + * TaskCompiler is a the base class for classes that compile + * operator pipelines into tasks. + */ +public abstract class TaskCompiler { + + protected final Log LOG = LogFactory.getLog(TaskCompiler.class); + + protected Hive db; + protected LogHelper console; + protected HiveConf conf; + + public void init(HiveConf conf, LogHelper console, Hive db) { + this.conf = conf; + this.db = db; + this.console = console; + } + + @SuppressWarnings({"nls", "unchecked"}) + public void compile(final ParseContext pCtx, final List> rootTasks, + final HashSet inputs, final HashSet outputs) throws SemanticException { + + Context ctx = pCtx.getContext(); + GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx(); + QB qb = pCtx.getQB(); + List> mvTask = new ArrayList>(); + + List loadTableWork = pCtx.getLoadTableWork(); + List loadFileWork = pCtx.getLoadFileWork(); + + boolean isCStats = qb.isAnalyzeRewrite(); + + if (pCtx.getFetchTask() != null) { + return; + } + + optimizeOperatorPlan(pCtx, inputs, outputs); + + /* + * In case of a select, use a fetch task instead of a move task. + * If the select is from analyze table column rewrite, don't create a fetch task. Instead create + * a column stats task later. + */ + if (pCtx.getQB().getIsQuery() && !isCStats) { + if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) { + throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); + } + + LoadFileDesc loadFileDesc = loadFileWork.get(0); + + String cols = loadFileDesc.getColumns(); + String colTypes = loadFileDesc.getColumnTypes(); + + TableDesc resultTab = pCtx.getFetchTabledesc(); + if (resultTab == null) { + String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); + resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); + } + + FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), + resultTab, qb.getParseInfo().getOuterQueryLimit()); + fetch.setSource(pCtx.getFetchSource()); + fetch.setSink(pCtx.getFetchSink()); + + pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf)); + + // For the FetchTask, the limit optimization requires we fetch all the rows + // in memory and count how many rows we get. It's not practical if the + // limit factor is too big + int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH); + if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) { + LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + + ". Doesn't qualify limit optimiztion."); + globalLimitCtx.disableOpt(); + } + } else if (!isCStats) { + for (LoadTableDesc ltd : loadTableWork) { + Task tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf); + mvTask.add(tsk); + // Check to see if we are stale'ing any indexes and auto-update them if we want + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { + IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf); + try { + List> indexUpdateTasks = indexUpdater + .generateUpdateTasks(); + for (Task updateTask : indexUpdateTasks) { + tsk.addDependentTask(updateTask); + } + } catch (HiveException e) { + console + .printInfo("WARNING: could not auto-update stale indexes, which are not in sync"); + } + } + } + + boolean oneLoadFile = true; + for (LoadFileDesc lfd : loadFileWork) { + if (qb.isCTAS()) { + assert (oneLoadFile); // should not have more than 1 load file for + // CTAS + // make the movetask's destination directory the table's destination. + Path location; + String loc = qb.getTableDesc().getLocation(); + if (loc == null) { + // get the table's default location + Table dumpTable; + Path targetPath; + try { + dumpTable = db.newTable(qb.getTableDesc().getTableName()); + if (!db.databaseExists(dumpTable.getDbName())) { + throw new SemanticException("ERROR: The database " + dumpTable.getDbName() + + " does not exist."); + } + Warehouse wh = new Warehouse(conf); + targetPath = wh.getTablePath(db.getDatabase(dumpTable.getDbName()), dumpTable + .getTableName()); + } catch (HiveException e) { + throw new SemanticException(e); + } catch (MetaException e) { + throw new SemanticException(e); + } + + location = targetPath; + } else { + location = new Path(loc); + } + lfd.setTargetDir(location); + + oneLoadFile = false; + } + mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf)); + } + } + + generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs); + + /* + * If the query was the result of analyze table column compute statistics rewrite, create + * a column stats task instead of a fetch task to persist stats to the metastore. + */ + if (isCStats) { + genColumnStatsTask(qb, loadTableWork, loadFileWork, rootTasks); + } + + // For each task, set the key descriptor for the reducer + for (Task rootTask : rootTasks) { + GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask); + } + + // If a task contains an operator which instructs bucketizedhiveinputformat + // to be used, please do so + for (Task rootTask : rootTasks) { + setInputFormat(rootTask); + } + + optimizeTaskPlan(rootTasks, pCtx, ctx); + + decideExecMode(rootTasks, ctx, globalLimitCtx); + + if (qb.isCTAS()) { + // generate a DDL task and make it a dependent task of the leaf + CreateTableDesc crtTblDesc = qb.getTableDesc(); + + crtTblDesc.validate(); + + // Clear the output for CTAS since we don't need the output from the + // mapredWork, the + // DDLWork at the tail of the chain will have the output + outputs.clear(); + + Task crtTblTask = TaskFactory.get(new DDLWork( + inputs, outputs, crtTblDesc), conf); + + // find all leaf tasks and make the DDLTask as a dependent task of all of + // them + HashSet> leaves = new LinkedHashSet>(); + getLeafTasks(rootTasks, leaves); + assert (leaves.size() > 0); + for (Task task : leaves) { + if (task instanceof StatsTask) { + // StatsTask require table to already exist + for (Task parentOfStatsTask : task.getParentTasks()) { + parentOfStatsTask.addDependentTask(crtTblTask); + } + for (Task parentOfCrtTblTask : crtTblTask.getParentTasks()) { + parentOfCrtTblTask.removeDependentTask(task); + } + crtTblTask.addDependentTask(task); + } else { + task.addDependentTask(crtTblTask); + } + } + } + + if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) { + LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit()); + pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit()); + } + + if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) { + LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit()); + globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit()); + List mrTasks = Utilities.getMRTasks(rootTasks); + for (ExecDriver tsk : mrTasks) { + tsk.setRetryCmdWhenFail(true); + } + } + } + + + /** + * A helper function to generate a column stats task on top of map-red task. The column stats + * task fetches from the output of the map-red task, constructs the column stats object and + * persists it to the metastore. + * + * This method generates a plan with a column stats task on top of map-red task and sets up the + * appropriate metadata to be used during execution. + * + * @param qb + */ + @SuppressWarnings("unchecked") + protected void genColumnStatsTask(QB qb, List loadTableWork, + List loadFileWork, List> rootTasks) { + QBParseInfo qbParseInfo = qb.getParseInfo(); + ColumnStatsTask cStatsTask = null; + ColumnStatsWork cStatsWork = null; + FetchWork fetch = null; + String tableName = qbParseInfo.getTableName(); + String partName = qbParseInfo.getPartName(); + List colName = qbParseInfo.getColName(); + List colType = qbParseInfo.getColType(); + boolean isTblLevel = qbParseInfo.isTblLvl(); + + String cols = loadFileWork.get(0).getColumns(); + String colTypes = loadFileWork.get(0).getColumnTypes(); + + String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); + TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); + + fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), + resultTab, qb.getParseInfo().getOuterQueryLimit()); + + ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, partName, + colName, colType, isTblLevel); + cStatsWork = new ColumnStatsWork(fetch, cStatsDesc); + cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf); + rootTasks.add(cStatsTask); + } + + + /** + * Find all leaf tasks of the list of root tasks. + */ + protected void getLeafTasks(List> rootTasks, + HashSet> leaves) { + + for (Task root : rootTasks) { + getLeafTasks(root, leaves); + } + } + + private void getLeafTasks(Task task, + HashSet> leaves) { + if (task.getDependentTasks() == null) { + if (!leaves.contains(task)) { + leaves.add(task); + } + } else { + getLeafTasks(task.getDependentTasks(), leaves); + } + } + + /* + * Called to transform tasks into local tasks where possible/desirable + */ + protected abstract void decideExecMode(List> rootTasks, Context ctx, + GlobalLimitCtx globalLimitCtx) throws SemanticException; + + /* + * Called at the beginning of the compile phase to have another chance to optimize the operator plan + */ + protected void optimizeOperatorPlan(ParseContext pCtxSet, Set inputs, + Set outputs) throws SemanticException { + } + + /* + * Called after the tasks have been generated to run another round of optimization + */ + protected abstract void optimizeTaskPlan(List> rootTasks, + ParseContext pCtx, Context ctx) throws SemanticException; + + /* + * Called to set the appropriate input format for tasks + */ + protected abstract void setInputFormat(Task rootTask); + + /* + * Called to generate the taks tree from the parse context/operator tree + */ + protected abstract void generateTaskTree(List> rootTasks, ParseContext pCtx, + List> mvTask, Set inputs, Set outputs) throws SemanticException; + + /** + * Create a clone of the parse context + */ + public ParseContext getParseContext(ParseContext pCtx, List> rootTasks) { + return new ParseContext(conf, pCtx.getQB(), pCtx.getParseTree(), + pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), + pCtx.getTopSelOps(), pCtx.getOpParseCtx(), pCtx.getJoinContext(), + pCtx.getSmbMapJoinContext(), pCtx.getTopToTable(), pCtx.getTopToProps(), + pCtx.getFsopToTable(), + pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getContext(), + pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), + pCtx.getListMapJoinOpsNoReducer(), pCtx.getGroupOpToInputTables(), + pCtx.getPrunedPartitions(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), + pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, + pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), + pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), + pCtx.getQueryProperties()); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java (working copy) @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.hive.conf.HiveConf; + +/** + * TaskCompilerFactory is a factory class to choose the appropriate + * TaskCompiler. + */ +public class TaskCompilerFactory { + + private TaskCompilerFactory() { + // avoid instantiation + } + + /** + * Returns the appropriate compiler to translate the operator tree + * into executable units. + */ + public static TaskCompiler getCompiler(HiveConf conf, ParseContext parseContext) { + return new MapReduceCompiler(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (working copy) @@ -19,10 +19,10 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; -import java.util.HashMap; +import java.util.LinkedList; import java.util.List; -import java.util.Map; +import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; /** @@ -32,8 +32,22 @@ @SuppressWarnings({"serial", "deprecation"}) public abstract class BaseWork extends AbstractOperatorDesc { + // dummyOps is a reference to all the HashTableDummy operators in the + // plan. These have to be separately initialized when we setup a task. + // Their funtion is mainly as root ops to give the mapjoin the correct + // schema info. + List dummyOps; + + public BaseWork() {} + + public BaseWork(String name) { + setName(name); + } + private boolean gatheringStats; + private String name; + public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; } @@ -42,6 +56,29 @@ return this.gatheringStats; } + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public List getDummyOps() { + return dummyOps; + } + + public void setDummyOps(List dummyOps) { + this.dummyOps = dummyOps; + } + + public void addDummyOp(HashTableDummyOperator dummyOp) { + if (dummyOps == null) { + dummyOps = new LinkedList(); + } + dummyOps.add(dummyOp); + } + protected abstract List> getAllRootOperators(); public List> getAllOperators() { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (working copy) @@ -187,6 +187,7 @@ this.tagOrder = clone.tagOrder; this.filters = clone.filters; this.filterMap = clone.filterMap; + this.statistics = clone.statistics; } public Map> getExprs() { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (working copy) @@ -47,6 +47,9 @@ private transient String bigTableAlias; + // for tez. used to remember which position maps to which logical input + private Map parentToInput = new HashMap(); + // table alias (small) --> input file name (big) --> target file names (small) private Map>> aliasBucketFileNameMapping; private Map bigTableBucketNumMapping; @@ -74,6 +77,7 @@ this.bigTableBucketNumMapping = clone.bigTableBucketNumMapping; this.bigTablePartSpecToFileMapping = clone.bigTablePartSpecToFileMapping; this.dumpFilePrefix = clone.dumpFilePrefix; + this.parentToInput = clone.parentToInput; } public MapJoinDesc(final Map> keys, @@ -106,6 +110,14 @@ } } + public Map getParentToInput() { + return parentToInput; + } + + public void setParentToInput(Map parentToInput) { + this.parentToInput = parentToInput; + } + public Map> getRetainList() { return retainList; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (working copy) @@ -116,8 +116,10 @@ private Map> scratchColumnMap = null; private boolean vectorMode = false; + public MapWork() {} - public MapWork() { + public MapWork(String name) { + super(name); } @Explain(displayName = "Path -> Alias", normalExplain = false) Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (working copy) @@ -29,7 +29,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -386,7 +388,8 @@ /** * Generate the table descriptor for Map-side join key. */ - public static TableDesc getMapJoinKeyTableDesc(List fieldSchemas) { + public static TableDesc getMapJoinKeyTableDesc(Configuration conf, + List fieldSchemas) { return new TableDesc(SequenceFileInputFormat.class, SequenceFileOutputFormat.class, Utilities.makeProperties("columns", MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas), Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (working copy) @@ -60,6 +60,12 @@ private int numDistributionKeys; /** + * Used in tez. Holds the name of the output + * that this reduce sink is writing to. + */ + private String outputName; + + /** * The partition columns (CLUSTER BY or DISTRIBUTE BY in Hive language). * Partition columns decide the reducer that the current row goes to. * Partition columns are not passed to reducer. @@ -273,4 +279,12 @@ List> distinctColumnIndices) { this.distinctColumnIndices = distinctColumnIndices; } + + public String getOutputName() { + return outputName; + } + + public void setOutputName(String outputName) { + this.outputName = outputName; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (revision 1555253) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (working copy) @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -44,6 +46,12 @@ @SuppressWarnings({"serial", "deprecation"}) public class ReduceWork extends BaseWork { + public ReduceWork() {} + + public ReduceWork(String name) { + super(name); + } + private static transient final Log LOG = LogFactory.getLog(ReduceWork.class); // schema of the map-reduce 'key' object - this is homogeneous @@ -63,6 +71,8 @@ // not (e.g.: group by) private boolean needsTagging; + private Map tagToInput = new HashMap(); + /** * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing * to keySerializeInfo of the ReduceSink @@ -103,6 +113,14 @@ this.needsTagging = needsTagging; } + public void setTagToInput(final Map tagToInput) { + this.tagToInput = tagToInput; + } + + public Map getTagToInput() { + return tagToInput; + } + @Override protected List> getAllRootOperators() { ArrayList> opList = new ArrayList>(); Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (revision 1555253) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (working copy) @@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -313,6 +314,7 @@ Configuration hconf = new JobConf(TestOperators.class); HiveConf.setVar(hconf, HiveConf.ConfVars.HADOOPMAPFILENAME, "hdfs:///testDir/testFile"); + IOContext.get().setInputPath(new Path("hdfs:///testDir/testFile")); // initialize pathToAliases ArrayList aliases = new ArrayList(); Index: ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (revision 1555253) +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (working copy) @@ -19,6 +19,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.DataInput; import java.io.DataOutput; @@ -425,8 +426,8 @@ OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.SplitGenerator splitter = new OrcInputFormat.SplitGenerator(context, fs, - fs.getFileStatus(new Path("/a/file"))); - splitter.createSplit(0, 200); + fs.getFileStatus(new Path("/a/file")), null); + splitter.createSplit(0, 200, null); FileSplit result = context.getResult(-1); assertEquals(0, result.getStart()); assertEquals(200, result.getLength()); @@ -436,7 +437,7 @@ assertEquals("host1-1", locs[0]); assertEquals("host1-2", locs[1]); assertEquals("host1-3", locs[2]); - splitter.createSplit(500, 600); + splitter.createSplit(500, 600, null); result = context.getResult(-1); locs = result.getLocations(); assertEquals(3, locs.length); @@ -443,7 +444,7 @@ assertEquals("host2-1", locs[0]); assertEquals("host0", locs[1]); assertEquals("host2-3", locs[2]); - splitter.createSplit(0, 2500); + splitter.createSplit(0, 2500, null); result = context.getResult(-1); locs = result.getLocations(); assertEquals(1, locs.length); @@ -468,7 +469,7 @@ OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.SplitGenerator splitter = new OrcInputFormat.SplitGenerator(context, fs, - fs.getFileStatus(new Path("/a/file"))); + fs.getFileStatus(new Path("/a/file")), null); splitter.run(); if (context.getErrors().size() > 0) { for(Throwable th: context.getErrors()) { @@ -496,7 +497,7 @@ conf.setInt(OrcInputFormat.MAX_SPLIT_SIZE, 0); context = new OrcInputFormat.Context(conf); splitter = new OrcInputFormat.SplitGenerator(context, fs, - fs.getFileStatus(new Path("/a/file"))); + fs.getFileStatus(new Path("/a/file")), null); splitter.run(); if (context.getErrors().size() > 0) { for(Throwable th: context.getErrors()) { @@ -562,7 +563,6 @@ IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector(); assertEquals(0.0, reader.getProgress(), 0.00001); - assertEquals(3, reader.getPos()); while (reader.next(key, value)) { assertEquals(++rowNum, intInspector.get(inspector. getStructFieldData(serde.deserialize(value), fields.get(0)))); @@ -697,7 +697,7 @@ InputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); - assertEquals(0, splits.length); + assertTrue(1 == splits.length); assertEquals(null, serde.getSerDeStats()); } Index: ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out (working copy) @@ -174,6 +174,8 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 + Statistics: + numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE Select Operator Statistics: numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE @@ -498,6 +500,8 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 + Statistics: + numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE Select Operator Statistics: numRows: 127 dataSize: 12786 basicStatsState: COMPLETE colStatsState: NONE Index: ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out (working copy) @@ -334,6 +334,8 @@ 1 [Column[key]] 2 [Column[key]] Position of Big Table: 2 + Statistics: + numRows: 255 dataSize: 25572 basicStatsState: COMPLETE colStatsState: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -345,6 +347,8 @@ 0 [] 1 [] Position of Big Table: 0 + Statistics: + numRows: 280 dataSize: 28129 basicStatsState: COMPLETE colStatsState: NONE Select Operator Statistics: numRows: 280 dataSize: 28129 basicStatsState: COMPLETE colStatsState: NONE Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -133,6 +133,8 @@ 1 [Column[key]] outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 + Statistics: + numRows: 31 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -145,6 +147,8 @@ 1 [Column[value]] outputColumnNames: _col1, _col4, _col9 Position of Big Table: 0 + Statistics: + numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col4 Index: ql/src/test/results/clientpositive/join32_lessSize.q.out =================================================================== --- ql/src/test/results/clientpositive/join32_lessSize.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/join32_lessSize.q.out (working copy) @@ -80,6 +80,8 @@ 1 [Column[key]] outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 + Statistics: + numRows: 31 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -272,6 +274,8 @@ 1 [Column[value]] outputColumnNames: _col1, _col4, _col9 Position of Big Table: 0 + Statistics: + numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col4 @@ -602,6 +606,8 @@ 1 [Column[value]] outputColumnNames: _col4 Position of Big Table: 0 + Statistics: + numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1805,6 +1811,8 @@ 1 [Column[key]] outputColumnNames: _col0, _col1 Position of Big Table: 1 + Statistics: + numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -2006,6 +2014,8 @@ 1 [Column[value]] outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 + Statistics: + numRows: 69 dataSize: 7032 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -2346,6 +2356,8 @@ 1 [Column[key]] outputColumnNames: _col0, _col1 Position of Big Table: 0 + Statistics: + numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -2547,6 +2559,8 @@ 1 [Column[value]] outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 + Statistics: + numRows: 69 dataSize: 7032 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 Index: ql/src/test/results/clientpositive/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/join33.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/join33.q.out (working copy) @@ -133,6 +133,8 @@ 1 [Column[key]] outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 + Statistics: + numRows: 31 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -145,6 +147,8 @@ 1 [Column[value]] outputColumnNames: _col1, _col4, _col9 Position of Big Table: 0 + Statistics: + numRows: 63 dataSize: 6393 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col4 Index: ql/src/test/results/clientpositive/join34.q.out =================================================================== --- ql/src/test/results/clientpositive/join34.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/join34.q.out (working copy) @@ -100,6 +100,8 @@ 1 [Column[key]] outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 + Statistics: + numRows: 19 dataSize: 3966 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col2 @@ -174,6 +176,8 @@ 1 [Column[key]] outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 + Statistics: + numRows: 19 dataSize: 3966 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col2 Index: ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out =================================================================== --- ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out (working copy) @@ -184,16 +184,16 @@ STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-9 depends on stages: Stage-5, Stage-4, Stage-7, Stage-12, Stage-11, Stage-14 - Stage-0 depends on stages: Stage-9 + Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-9 + Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 Stage-12 Stage-11 @@ -246,16 +246,16 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -284,7 +284,7 @@ Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -298,7 +298,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -312,7 +312,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true @@ -650,16 +650,16 @@ STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-9 depends on stages: Stage-5, Stage-4, Stage-7, Stage-12, Stage-11, Stage-14 - Stage-0 depends on stages: Stage-9 + Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-9 + Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 Stage-12 Stage-11 @@ -712,16 +712,16 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -750,7 +750,7 @@ Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -764,7 +764,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -778,7 +778,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true @@ -1201,16 +1201,16 @@ STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-9 depends on stages: Stage-5, Stage-4, Stage-7, Stage-12, Stage-11, Stage-14 - Stage-0 depends on stages: Stage-9 + Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-9 + Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 Stage-12 Stage-11 @@ -1307,16 +1307,16 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -1345,7 +1345,7 @@ Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1359,7 +1359,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1373,7 +1373,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true @@ -1789,16 +1789,16 @@ STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-9 depends on stages: Stage-5, Stage-4, Stage-7, Stage-12, Stage-11, Stage-14 - Stage-0 depends on stages: Stage-9 + Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-9 + Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 Stage-12 Stage-11 @@ -1895,16 +1895,16 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -1933,7 +1933,7 @@ Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1947,7 +1947,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -1961,7 +1961,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true @@ -2432,16 +2432,16 @@ STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-9 depends on stages: Stage-5, Stage-4, Stage-7, Stage-12, Stage-11, Stage-14 - Stage-0 depends on stages: Stage-9 + Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-9 + Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 Stage-12 Stage-11 @@ -2551,16 +2551,16 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -2589,7 +2589,7 @@ Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -2603,7 +2603,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -2617,7 +2617,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true @@ -3178,16 +3178,16 @@ STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-9 depends on stages: Stage-5, Stage-4, Stage-7, Stage-12, Stage-11, Stage-14 - Stage-0 depends on stages: Stage-9 + Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-9 + Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 Stage-12 Stage-11 @@ -3297,16 +3297,16 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-4 Dependency Collection Stage: Stage-0 @@ -3335,7 +3335,7 @@ Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -3349,7 +3349,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -3363,7 +3363,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true @@ -6276,16 +6276,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 - Stage-7 - Stage-11 depends on stages: Stage-7, Stage-6, Stage-9, Stage-14, Stage-13, Stage-16, Stage-18, Stage-19 - Stage-0 depends on stages: Stage-11 + Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 + Stage-8 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-18, Stage-19 + Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-11 + Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 - Stage-6 - Stage-8 - Stage-9 depends on stages: Stage-8 + Stage-7 + Stage-9 + Stage-10 depends on stages: Stage-9 Stage-17 depends on stages: Stage-4 , consists of Stage-14, Stage-13, Stage-15 Stage-14 Stage-13 @@ -6422,16 +6422,16 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-11 Conditional Operator - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-6 Dependency Collection Stage: Stage-0 @@ -6460,7 +6460,7 @@ Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -6474,7 +6474,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-8 + Stage: Stage-9 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -6488,7 +6488,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-9 + Stage: Stage-10 Move Operator files: hdfs directory: true @@ -6855,16 +6855,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 - Stage-7 - Stage-11 depends on stages: Stage-7, Stage-6, Stage-9, Stage-14, Stage-13, Stage-16, Stage-18, Stage-19 - Stage-0 depends on stages: Stage-11 + Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 + Stage-8 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-18, Stage-19 + Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-11 + Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 - Stage-6 - Stage-8 - Stage-9 depends on stages: Stage-8 + Stage-7 + Stage-9 + Stage-10 depends on stages: Stage-9 Stage-17 depends on stages: Stage-4 , consists of Stage-14, Stage-13, Stage-15 Stage-14 Stage-13 @@ -7001,16 +7001,16 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-11 Conditional Operator - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-6 Dependency Collection Stage: Stage-0 @@ -7039,7 +7039,7 @@ Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-6 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -7053,7 +7053,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-8 + Stage: Stage-9 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -7067,7 +7067,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 - Stage: Stage-9 + Stage: Stage-10 Move Operator files: hdfs directory: true Index: ql/src/test/results/clientpositive/vector_left_outer_join.q.out =================================================================== --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out (working copy) @@ -152,4 +152,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -192735557 +225951785 Index: ql/src/test/results/clientpositive/vectorized_mapjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out (revision 1555253) +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out (working copy) @@ -131,4 +131,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -3149925 1073680599 -1073051226 9.381482540406644E8 +3152013 1073680599 -1073279343 9.375396162525452E8 Index: ql/src/test/results/compiler/plan/case_sensitivity.q.xml =================================================================== --- ql/src/test/results/compiler/plan/case_sensitivity.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/case_sensitivity.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_6 + + + + - - FS_6 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -646,173 +642,114 @@ src_thrift - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - - - - - - - lintstring - - - src_thrift - - - - - - - - - myint - - - mystring - - - underscore_int - - - - - - - - - int - - - - - - - - - - - - - - - - - - - - - - - - 0 - - - - - - - - - - - - - - - MYSTRING - - - false - - - - - - - - _col0 - + + + + + + + + + + + _col1 + + + - lint + lintstring src_thrift - + - + + + + + myint + + + mystring + + + underscore_int + + + + + + + + + int + + + + + + + + + + + + @@ -824,7 +761,7 @@ - 1 + 0 @@ -834,206 +771,253 @@ - + - - - - - - - - - - - - - + + MYSTRING - - - - _col0 - - - _col1 - - + + false - - - - SEL_2 - - - - - + + - - - + + _col0 + + - - - _col0 + + + lint - - + + src_thrift - - int + + + + + + - - - _col1 + + + - - + + 1 - - string - + + + + + + - - - - - - - + + + - - - - - - - lint - - - src_thrift - - - - - - - - - - - - - 0 - - - - - - - - - - - - + - - - - - - 0 - - + - - - - - - - boolean + + + + _col0 + + _col1 + - - - - FIL_4 - - - - - + + SEL_2 - - - - - - - - - - lint + + + + + + + + + + + + + _col0 + + + + + + int + + - - src_thrift + + + + _col1 + + + + + + string + + - - - - - array<int> - - - - - lintstring - - - src_thrift - - - - - - array<struct<myint:int,mystring:string,underscore_int:int>> - - - - - - + + + + + + + + + + + + + + + lint + + + src_thrift + + + + + + + + + + + + + 0 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + FIL_4 + + + + + + + + + + + + + + lint + + + src_thrift + + + + + + array<int> + + + + + + + lintstring + + + src_thrift + + + + + + array<struct<myint:int,mystring:string,underscore_int:int>> + + + + + + + + + Index: ql/src/test/results/compiler/plan/cast1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/cast1.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/cast1.q.xml (working copy) @@ -153,754 +153,742 @@ src - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1,_col2,_col3,_col4,_col5,_col6 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + int:double:double:double:int:boolean:int + + + escape.delim + \ + + + + + + + 1 + + + + + FS_3 + + - - - - - #### A masked pattern was here #### + + + + + + + + + + + _col0 + + + + + + + + int + + + + + int + + + + + + + _col1 + + + + + + + + double + + + + + double + + + + + + + _col2 + + + + + + + + + double + + + + + + + _col3 + + + + + + + + + double + + + + + + + _col4 + + + + + + + + + int + + + + + + + _col5 + + + + + + + + boolean + + + + + boolean + + + + + + + _col6 + + + + + + + + + int + + + + + + + + + + + + + + _col6 + + + + + + + - - 1 - - - #### A masked pattern was here #### - - + true - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3,_col4,_col5,_col6 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - int:double:double:double:int:boolean:int - - - escape.delim - \ - - - - + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFToInteger + + + UDFToInteger + + + + + + + + + + _col5 + + + + + + + - + 1 - - FS_3 + + + + + + org.apache.hadoop.hive.ql.udf.UDFToBoolean - - - - + + UDFToBoolean + + + + + + + + + + _col4 + + + + + + + + + 3 + - - - + + + - - - _col0 - - - - - - - - int - - - - - int - - - - - - - _col1 - - - - - - - - double - - - - - double - - - - - - - _col2 - - - - - + + - - double + + 2.0 - - - - _col3 - - - - - - - - - double - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFToInteger - - - - _col4 - - - - - - - - - int - - + + UDFToInteger - - - - _col5 - - - - - - - - boolean - - - - - boolean - - - - - - - _col6 - - - - - - - - - int - - - + + + + + + + + + - - - - _col6 - - - - - - - - - - true - - + + _col3 + + + + + + + + + 3.0 + - - - - org.apache.hadoop.hive.ql.udf.UDFToInteger + + + + - - UDFToInteger + + 2.0 - - - - - _col5 - - - - - - - - - - 1 - - + + + + + + + + + + _col2 + + + + + + + + + 3 + - - - - org.apache.hadoop.hive.ql.udf.UDFToBoolean + + + + - - UDFToBoolean + + 2.0 - - - - - _col4 - - - - - - - - - - 3 - - + + + + + + + + + + _col1 + + + + + + + - - - - - - - - - - - 2.0 - - - - - - - - - org.apache.hadoop.hive.ql.udf.UDFToInteger - - - UDFToInteger - - - - - - - + + 3.0 - - - - - - - - - - _col3 - - - - - - - - - - 3.0 - - + + + + - - - - - - - 2.0 - - + + 2 - - - - - - - - _col2 - - - - - - - - - - 3 - - + + + + + + + + + + _col0 + + + + + + + - - - - - - - 2.0 - - + + 3 - - - - - - - - - - _col1 - - - - - - - - - - 3.0 - - + + + + - - - - - - - 2 - - + + 2 - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 - - - - - - - - - - 3 - - - - - - - - - - 2 - - - - + + + _col1 + + + _col2 + + + _col3 + + + _col4 + + + _col5 + + + _col6 + + + + + + + SEL_2 + + + + + + + + + + + + + + _c0 - - + + _col0 - + + + int + - - - - - - - - + + + + _c1 - - + + _col1 - - + + - - + + double - - + + + + + + _c2 - - + + _col2 - - + + + + double + - - - - _col0 + + + + _c3 - - _col1 - - - _col2 - - + _col3 - - _col4 + + - - _col5 + + double - - _col6 - - - - - SEL_2 - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - int - - + + + _c4 - - - - _c1 - - - _col1 - - - - - - double - - + + _col4 - - - - _c2 - - - _col2 - - - - - - double - - + + - - - - _c3 - - - _col3 - - - - - - double - - + + int - - - - _c4 - - - _col4 - - - - - - int - - - - - - - _c5 - - - _col5 - - - - - - boolean - - - - - - - _c6 - - - _col6 - - - - - - int - - - - - - - - - - - - - - - - - - key + + + _c5 - - src + + _col5 - - - - string - - + + + + boolean + - - + + + _c6 + + + _col6 + + - - 86 + + int - - - - - - - - FIL_4 + + + + + + + + + + + key + + + src + + + + + string + + + + + + + + + + + + 86 + + + + + + + + + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - key - - - src - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/groupby1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby1.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/groupby1.q.xml (working copy) @@ -297,297 +297,289 @@ src - - - - - - - - + + + + + + + + + + + + KEY._col0 + + + _col0 + + + + + + + + string + + + + + + + VALUE._col0 + + + _col1 + + + + + + + + double + + + + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + _col0 + + + + + + + _col0 + + + + + + + + -1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + double + + + escape.delim + \ + + + + + + + + + RS_3 + + + + + + + + + + + + + + KEY._col0 + + + + + + string + + + + + + + VALUE._col0 + + + + + + double + + + + + + + + + + + + + + _col0 + + + key + + + src + + + + + + + + + + + - - - - - KEY._col0 - - - _col0 - - - - - - - - string - - - - - - - VALUE._col0 - - - _col1 - - - - - - - - double - - - - - - + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - _col0 - - - - - - - _col0 - - - - - - - - -1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - double - - - escape.delim - \ - - - - - + + sum + + + + PARTIAL1 - - RS_3 - - + - - - - - - - - - - - - KEY._col0 - - - - - - string - - - - - - - VALUE._col0 - - - - - - double - - - - - - - - - - - - - - - _col0 - - - key - - - src - - - - - - - - - - - - - - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble - - - sum - - - - PARTIAL1 - - - - - - - - - - - - value - - - src - - - - - + + + + + + + value - - - - - - int - - - - - 5 - - + + src + + + - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + + + int + + - - substr + + 5 - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr + + substr + + + + @@ -594,266 +586,262 @@ - - 0.5 - - - 1 - - - - - - - - - - - - - 0.9 - - - - HASH - - - - - - _col0 - - - _col1 - - - - - GBY_2 + + 0.5 - + + 1 + + - + - - - - - - - - _col0 - - - - - - - - - string - - - - - - - _col1 - - - - - - - - - double - - - - - - + + - - - - - - - - BLOCK__OFFSET__INSIDE__FILE - - - BLOCK__OFFSET__INSIDE__FILE + + 0.9 - - true - - - src - - - - - bigint - + + + HASH - - - - INPUT__FILE__NAME - - - INPUT__FILE__NAME - - - true - - - src - - - - - - - - value - - - value - - - src - - - - - - - - key - - - key - - - src - - - - - - - - - - - - - - - - key + + + + _col0 - - src + + _col1 - - - - - - - value - - - src - - - - - - - - - - key - - - value - - + + GBY_2 - - true + + + + - - - - SEL_1 - - - - - - - - - - - - - - - - key + + + + + + + + _col0 + + + + + + + + + string + + - - src + + + + _col1 + + + + + + + + + double + + - - - - - string - - - - - value - - - src - - - - - - string - - - - - - + + + + + BLOCK__OFFSET__INSIDE__FILE + + + BLOCK__OFFSET__INSIDE__FILE + + + true + + + src + + + + + bigint + + + + + + + INPUT__FILE__NAME + + + INPUT__FILE__NAME + + + true + + + src + + + + + + + + value + + + value + + + src + + + + + + + + key + + + key + + + src + + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + value + + + src + + + + + + + + + + + + key + + + value + + + + + true + + + + + SEL_1 + + + + + + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + + @@ -1136,207 +1124,199 @@ - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_6 + + #### A masked pattern was here #### - - - - - - + + true - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - + + - - - - - - - - _col1 - - - _col1 + + 150 - - + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - _col0 + + true - - + + - - + + 1 - - - - - - - - - - - - - + + FS_6 - - - - _col0 - - - _col1 - - + + + + - - - - SEL_5 - - - - - - - - - - - - - - - - _col0 + + + + + + + + key + + + + + + + + + string + + - - src + + + + value + + + + + + + + + string + + - - - - - string - - - - - _col1 - - - - - - double - - - - - - + + + + + _col1 + + + _col1 + + + + + + + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_5 + + + + + + + + + + + + + + _col0 + + + src + + + + + + string + + + + + + + _col1 + + + + + + double + + + + + + + + + @@ -1425,11 +1405,9 @@ GBY_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/groupby2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby2.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/groupby2.q.xml (working copy) @@ -153,529 +153,521 @@ src - - - - - - - - - + + + + + + + + + + + + KEY._col0 + + + _col0 + + + + + + + + string + + + + + + + VALUE._col1 + + + _col3 + + + + + + + + double + + + + + + + KEY._col1:0._col0 + + + _col1 + + + + + + + + + + + VALUE._col0 + + + _col2 + + + + + + + + bigint + + + + + + + + + + + + + + + 1 + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + ++ + + + columns.types + string,uniontype<struct<_col0:string>> + + + + + + + 1 + + + -1 + + + + + _col0 + + + _col1 + + + + + + + _col0 + + + _col1 + + + + + + + + + + + + -1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + bigint,double + + + escape.delim + \ + + + + + + + + + RS_3 + + - - - - - KEY._col0 - - - _col0 - - - - - - - - string - - - - + + + + + + + + + + + KEY._col0 + + + + + + string + + + + + + + KEY._col1:0._col0 + + + + + + string + + + + + + + VALUE._col0 + + + + + + bigint + + + + + + + VALUE._col1 + + + + + + double + + + + + + + + + + + + + + _col1 + + + + + + + value - - VALUE._col1 - - - _col3 - - - - - - - - double - - - - + + src - - KEY._col1:0._col0 - - - _col1 - - - - - - - - + + - - VALUE._col0 - - - _col2 - - - - - - - - bigint - - - - - - - - - - - - - 1 - - + + + + + + int - - - - - - - - - + + 5 - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - ++ - - - columns.types - string,uniontype<struct<_col0:string>> - - - - - - - 1 - - - -1 - - - - - _col0 - - - _col1 - - - - - - - _col0 - - - _col1 - - - - - - - - - - - - -1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - bigint,double - - - escape.delim - \ - - - - - - - RS_3 + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr - - - - - - + + substr - - - - - - - - KEY._col0 - - - - - - string - - - - - - - KEY._col1:0._col0 - - - - - - string - - - - - - - VALUE._col0 - - - - - - bigint - - - - - - - VALUE._col1 - - - - - - double - - - - - - - + + + - - - - _col1 - - - - - - - value - - - src - - - - - + + _col0 + + + + + + + key - - - - - - int - - - - - 5 - - + + src - - - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + - - substr - - - - - - - - _col0 - - - - - - - key - - - src - - - - - + + + + - - - - - - - 1 - - + + 1 - - - - - - - 1 - - - - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 1 - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr + + substr + + + + - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount$GenericUDAFCountEvaluator - - - count - - - - PARTIAL1 - - - - - - - - - - - - value - - - src - - - - - + + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount$GenericUDAFCountEvaluator + + + count + + + + PARTIAL1 + + + + + + + + + + + + value - - - - - - - 5 - - + + src + + + - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 5 - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr + + substr + + + + - - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble - - - sum - - - - - - - - - - - - - - value - - - src - - - - - + + + + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble + + + sum + + + + + + + + + + + + + + value - - - - - - - 5 - - + + src + + + - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 5 - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr + + substr + + + + @@ -682,306 +674,302 @@ - - true - - - 0.5 - - - 1 - - - - - - - - - - - - - - - - 0.9 - - - - HASH - - - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - - - GBY_2 + + true - + + 0.5 + + + 1 + + - + - - - - - - - - - - _col0 - - - - - - - - - string - - - - - - - _col1 - - - - - - - - - string - - - - - - - _col2 - - - - - - - - - bigint - - - - - - - _col3 - - - - - - - - - double - - - - + + - - - - - - - - BLOCK__OFFSET__INSIDE__FILE - - - BLOCK__OFFSET__INSIDE__FILE + + - - true + + 0.9 - - src + + + HASH + - - - - - - - INPUT__FILE__NAME - - - INPUT__FILE__NAME - - - true - - - src - - - - - - - - value - - - value - - - src - - - - - - - - key - - - key - - - src - - - - - - - - - - - - - - - - key + + + + _col0 - - src + + _col1 - - + + _col2 - - - - - - value + + _col3 - - src - - - - - - - - key - - - value - - + + GBY_2 - - true + + + + - - - - SEL_1 - - - - - - - - - - - - - - - - key + + + + + + + + _col0 + + + + + + + + + string + + - - src + + + + _col1 + + + + + + + + + string + + - - + + + + _col2 + + + + + + + + + bigint + + - - string + + + + _col3 + + + + + + + + + double + + - - - - value - - - src - - - - - - string - - - - - - + + + + + BLOCK__OFFSET__INSIDE__FILE + + + BLOCK__OFFSET__INSIDE__FILE + + + true + + + src + + + + + + + + INPUT__FILE__NAME + + + INPUT__FILE__NAME + + + true + + + src + + + + + + + + value + + + value + + + src + + + + + + + + key + + + key + + + src + + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + value + + + src + + + + + + + + + + + + key + + + value + + + + + true + + + + + SEL_1 + + + + + + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + + @@ -1258,312 +1246,304 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:bigint:string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - 1 - - - - - FS_6 - - - - - - - - - - - - - - - - _col0 - - - - - - - - - string - - + + serialization.format + 1 - - - - _col1 - - - - - - - - - bigint - - + + columns.types + string:bigint:string - - - - _col2 - - - - - - - - - string - - + + escape.delim + \ + + 1 + - - - - - - _col2 - - + + FS_6 + + + + + + + + + - - + + _col0 - + + + string + - - + + + _col1 + + + + + + + + + bigint + + + + + + _col2 - - + + + + string + - - - - - - - - _col1 - - - _col1 - - - - - - - - - - - _col0 - - - _col0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - - + + + + + _col2 + + + + + + + _col0 + + + + + + + + + + + + + _col2 + + + + + + + + + + + + + + + + + + + + + _col1 + + + _col1 + + + + + + + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + + + + + SEL_5 + + + + - - SEL_5 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - string - - - - - - - _c1 - - - _col1 - - - - - - bigint - - - - - - - _c2 - - - _col2 - - - - - - string - - - - - - - - - - + + + + + + + + + _c0 + + + _col0 + + + + + + string + + + + + + + _c1 + + + _col1 + + + + + + bigint + + + + + + + _c2 + + + _col2 + + + + + + string + + + + + + + + + @@ -1690,11 +1670,9 @@ GBY_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/groupby3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby3.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/groupby3.q.xml (working copy) @@ -153,366 +153,543 @@ src - - - - - - - - - + + + + + + + + + + + + VALUE._col4 + + + _col5 + + + + + + + + string + + + + + + + VALUE._col3 + + + _col4 + + + + + + + + + + + VALUE._col2 + + + _col3 + + + + + + + + + + count + + + sum + + + + + + + + + bigint + + + + + + + double + + + + + + + + + + + VALUE._col1 + + + _col2 + + + + + + + + + + + VALUE._col0 + + + _col1 + + + + + + + + + + + KEY._col0:0._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + uniontype<struct<_col0:string>> + + + + + + + 1 + + + + + _col0 + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + _col4 + + + + + + + + -1 + + + + + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1,_col2,_col3,_col4 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + double,struct<count:bigint,sum:double>,struct<count:bigint,sum:double>,string,string + + + escape.delim + \ + + + + + + + + + RS_3 + + - - - - - VALUE._col4 - - - _col5 - - - - - - - - string - - - - + + + + + + + + + + + KEY._col0:0._col0 + + + + + + string + + + + + + + VALUE._col0 + + + + + + double + + + + + + + VALUE._col1 + + + + + + struct<count:bigint,sum:double> + + + + + + + VALUE._col2 + + + + + + struct<count:bigint,sum:double> + + + + + + + VALUE._col3 + + + + + + string + + + + + + + VALUE._col4 + + + + + + string + + + + + + + + + + + + + + _col0 + + + + + + + value - - VALUE._col3 - - - _col4 + + src + + + + + + + + + + + + int - - - - - - - - VALUE._col2 - - - _col3 - - - - - - - - - - count + + 5 + + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr + + + substr + + + + + + + + + + + + + + + + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble + + + sum + + + + PARTIAL1 + + + + + + + + + + + + value - - sum + + src + + + - - - - - - bigint - - + + + + - - - - double - - + + 5 - - - - VALUE._col1 - - - _col2 - - - - - - - - - - - VALUE._col0 - - - _col1 - - - - - - - - - - - KEY._col0:0._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - 0 + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 + + substr - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - uniontype<struct<_col0:string>> - - - - - 1 - - - - - _col0 + + - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - _col4 - - - - - - - - -1 - - - - - - - - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1,_col2,_col3,_col4 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - double,struct<count:bigint,sum:double>,struct<count:bigint,sum:double>,string,string - - - escape.delim - \ - - - - - - - RS_3 + + + + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$GenericUDAFAverageEvaluatorDouble - + + avg + + + + + - - - - - - - - - - - - KEY._col0:0._col0 + + + + + + + value + + + src + + + + + - - + + + + + + + 5 + + - - string - - - - - VALUE._col0 + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr - - + + substr - - double - - - - - VALUE._col1 - - - - - - struct<count:bigint,sum:double> - - + + - - - - VALUE._col2 - - - - - - struct<count:bigint,sum:double> - - - - - - - VALUE._col3 - - - - - - string - - - - - - - VALUE._col4 - - - - - - string - - - @@ -519,371 +696,186 @@ - - - - - - _col0 - - - - - - - value - - - src - - - - - - - - - - - - int - - - - - 5 - - - - + + + + true - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr - - - substr - - + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$GenericUDAFAverageEvaluatorDouble - - + + avg - - - - - - - - - - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble - - - sum - - - - PARTIAL1 - - - - - - - - - - - - value - - - src - - - - - + + + + + + + + + + + + + value - - - - - - - 5 - - + + src + + + - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 5 - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr + + substr + + + + - - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$GenericUDAFAverageEvaluatorDouble - - - avg - - - - - - - - - - - - - - value - - - src - - - - - + + + + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax$GenericUDAFMaxEvaluator + + + max + + + + + + + + + + + + + + value - - - - - - - 5 - - + + src + + + - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 5 - - - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$GenericUDAFAverageEvaluatorDouble - - - avg - - - - - - - - - - - - - - value - - - src - - - - - - - - - - - - - 5 - - - - + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr - - - substr - - + + substr - - - + + + - - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax$GenericUDAFMaxEvaluator - - - max - - - - - - - - - - - - - - value - - - src - - - - - + + + + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin$GenericUDAFMinEvaluator + + + min + + + + + + + + + + + + + + value - - - - - - - 5 - - + + src + + + - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 5 - - - - - - - - - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin$GenericUDAFMinEvaluator - - - min - - - - - - - - - - - - - - value - - - src - - - - - - - - - - - - - 5 - - - - + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr - - - substr - - + + substr - - - + + + @@ -890,164 +882,162 @@ - - true + + + + true + + + 0.5 + + + + + - - 0.5 + + + + + + + 0.9 + + + + HASH + + + + + + _col0 - - - - - - + + _col1 - - + + _col2 - - 0.9 + + _col3 - - - HASH - + + _col4 - - - + + _col5 + + + + + + + GBY_2 + + + + + + + + + + + + + _col0 - + + + + + + + + string + + + + + + _col1 - + + + + + + + + double + + + + + + _col2 - + + + + + + + + struct<count:bigint,sum:double> + + + + + + _col3 - - _col4 + + - - _col5 + + + + struct<count:bigint,sum:double> + - - - - GBY_2 - - - - - - - - - - - - - - - _col0 - - - - - - - - - string - - + + + _col4 - - - - _col1 - - - - - - - - - double - - + + - - - - _col2 - - - - - - - - - struct<count:bigint,sum:double> - - + + - - - - _col3 - - - - - - - - - struct<count:bigint,sum:double> - - + + string - - - - _col4 - - - - - - - - - string - - + + + + + + _col5 - - - - _col5 - - - - - - - - - string - - + + + + + + + string + @@ -1056,140 +1046,138 @@ - - - - BLOCK__OFFSET__INSIDE__FILE - - - BLOCK__OFFSET__INSIDE__FILE - - - true - - - src - - - - - - - - INPUT__FILE__NAME - - - INPUT__FILE__NAME - - - true - - - src - - - - - - - - value - - - value - - - src - - - - - - - - key - - - key - - - src - - - - - - - + + + + + BLOCK__OFFSET__INSIDE__FILE + + + BLOCK__OFFSET__INSIDE__FILE + + + true + + + src + + + + + + + + INPUT__FILE__NAME + + + INPUT__FILE__NAME + + + true + + + src + + + + + + + + value + + + value + + + src + + + + + + + + key + + + key + + + src + + + + + + + + + + + + + + + + value + + + src + + + + + + + + + + + + value + + + + + true + + + + + SEL_1 + + + + - - - - - - - - value - - - src - - - - - - - - - - - - value - - - - - true - - - - - SEL_1 - - - - - - - - - - - - - - - - value - - - src - - - - - - string - - - - - - - - - - + + + + + + + + + value + + + src + + + + + + string + + + + + + + + + @@ -1473,389 +1461,381 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2,_col3,_col4 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3,_col4 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - double:double:double:string:string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + double:double:double:string:string + + + escape.delim + \ + - - 1 - - - FS_6 + + 1 - + + + + FS_6 + + + + + + + + + - + + + _col0 + + + + + + + + + double + + - - - - - - - - - - _col0 - - - - - - - - - double - - + + + + _col1 - - - - _col1 - - - - - - - - - double - - + + - - - - _col2 - - - - - - - - - double - - + + - - - - _col3 - - - - - - - - - string - - + + double - - - - _col4 - - - - - - - - - string - - + + + + + + _col2 + + + + + + + + double + - - - - - - - - - - _col4 - - - _col4 - - - - - - - - - - - _col3 - - - _col3 - - - - - - - - - - - _col2 - - - _col2 - - - - - - - - - - - _col1 - - - _col1 - - - - - - - - - - - _col0 - - - _col0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - _col4 - - - - - - - SEL_5 - - - - - - - - - - - - - - - - _c0 + + + + _col3 + + + + + + + + + string + + - - _col0 + + + + _col4 + + + + + + + + + string + + - - - - - double - - - - - _c1 - - - _col1 - - - - - - double - - - - - - - _c2 - - - _col2 - - - - - - double - - - - - - - _c3 - - - _col3 - - - - - - string - - - - - - - _c4 - - - _col4 - - - - - - string - - - - - - + + + + + _col4 + + + _col4 + + + + + + + + + + + _col3 + + + _col3 + + + + + + + + + + + _col2 + + + _col2 + + + + + + + + + + + _col1 + + + _col1 + + + + + + + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + _col4 + + + + + + + SEL_5 + + + + + + + + + + + + + + _c0 + + + _col0 + + + + + + double + + + + + + + _c1 + + + _col1 + + + + + + double + + + + + + + _c2 + + + _col2 + + + + + + double + + + + + + + _c3 + + + _col3 + + + + + + string + + + + + + + _c4 + + + _col4 + + + + + + string + + + + + + + + + @@ -2053,11 +2033,9 @@ GBY_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/groupby4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby4.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/groupby4.q.xml (working copy) @@ -153,441 +153,296 @@ src - - - - - - - - - + + + + + + + + + + + + KEY._col0 + + + _col0 + + + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + _col0 + + + + + + + + + + + -1 + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + + + + escape.delim + \ + + + + + + + + + RS_3 + + - - - - - KEY._col0 - - - _col0 - - - - - - - - string - - - - + + + + + + + + + + + KEY._col0 + + + + + + string + + + + + + + + + + + + + + _col0 + + + + + + + key - - - - - - + + src - - - - - - + + - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - _col0 - - - - - - - - - - - -1 - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - - - - escape.delim - \ - - - - - - - RS_3 - - - - - - - - - - - - - - - - KEY._col0 - - - - - - string - - + + + + + + int - - - - - - - - - - _col0 - - - - - - - key - - - src - - - - - + + 1 - - - - - - int - - - - - 1 - - - - - - - - - - 1 - - - - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 1 - - - - - - - - - - - - 0.5 - - - 1 - - - - - + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr - - - - - - - 0.9 - - - - HASH - - - - - - _col0 + + substr - - - - GBY_2 - - - - - + + - - - - - - - - _col0 - - - - - - - - - string - - - - - - - - - - - - - BLOCK__OFFSET__INSIDE__FILE - - - BLOCK__OFFSET__INSIDE__FILE + + + + - - true + + 0.5 - - src + + 1 - - - - bigint + + + + - - - - INPUT__FILE__NAME - - - INPUT__FILE__NAME + + - - true + + 0.9 - - src + + + HASH + - - - - - - - value - - - value - - - src - - - - - - - - key - - - key - - - src - - - - - - - - - - - - - - - - key + + + + _col0 - - src - - - - - - - - key - - + + GBY_2 - - true + + + + - - - - SEL_1 - - - - - - - - - - - - - - - - key + + + + + + + + _col0 + + + + + + + + + string + + - - src - - - - - - string - @@ -594,9 +449,142 @@ - - - + + + + + BLOCK__OFFSET__INSIDE__FILE + + + BLOCK__OFFSET__INSIDE__FILE + + + true + + + src + + + + + bigint + + + + + + + INPUT__FILE__NAME + + + INPUT__FILE__NAME + + + true + + + src + + + + + + + + value + + + value + + + src + + + + + + + + key + + + key + + + src + + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + + + key + + + + + true + + + + + SEL_1 + + + + + + + + + + + + + + key + + + src + + + + + + string + + + + + + + + + @@ -880,100 +868,94 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + string + + + escape.delim + \ + - - 1 - - - FS_6 + + 1 - + + + + FS_6 + + + + + + + + + - - - - - - - - - - - - _col0 - - - - - - - - - string - - + + + _col0 + + + + + + + + string + @@ -982,79 +964,77 @@ - - - - _col0 - - - _col0 - - - - - - - - - - + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + SEL_5 + + + + - - - - - - - - - - - - - _col0 - - - - - - - SEL_5 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - string - - - - - - - - - - + + + + + + + + + _c0 + + + _col0 + + + + + + string + + + + + + + + + @@ -1110,11 +1090,9 @@ GBY_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/groupby5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby5.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/groupby5.q.xml (working copy) @@ -153,297 +153,289 @@ src - - - - - - - - + + + + + + + + + + + + KEY._col0 + + + _col0 + + + + + + + + string + + + + + + + VALUE._col0 + + + _col1 + + + + + + + + double + + + + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + _col0 + + + + + + + _col0 + + + + + + + + -1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + double + + + escape.delim + \ + + + + + + + + + RS_3 + + + + + + + + + + + + + + KEY._col0 + + + + + + string + + + + + + + VALUE._col0 + + + + + + double + + + + + + + + + + + + + + _col0 + + + key + + + src + + + + + + + + + + + - - - - - KEY._col0 - - - _col0 - - - - - - - - string - - - - - - - VALUE._col0 - - - _col1 - - - - - - - - double - - - - - - + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - _col0 - - - - - - - _col0 - - - - - - - - -1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - double - - - escape.delim - \ - - - - - + + sum + + + + PARTIAL1 - - RS_3 - - + - - - - - - - - - - - - KEY._col0 - - - - - - string - - - - - - - VALUE._col0 - - - - - - double - - - - - - - - - - - - - - - _col0 - - - key - - - src - - - - - - - - - - - - - - - - org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble - - - sum - - - - PARTIAL1 - - - - - - - - - - - - value - - - src - - - - - + + + + + + + value - - - - - - int - - - - - 5 - - + + src + + + - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + + + int + + - - substr + + 5 - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr + + substr + + + + @@ -450,266 +442,262 @@ - - 0.5 - - - 1 - - - - - - - - - - - - - 0.9 - - - - HASH - - - - - - _col0 - - - _col1 - - - - - GBY_2 + + 0.5 - + + 1 + + - + - - - - - - - - _col0 - - - - - - - - - string - - - - - - - _col1 - - - - - - - - - double - - - - - - + + - - - - - - - - BLOCK__OFFSET__INSIDE__FILE - - - BLOCK__OFFSET__INSIDE__FILE + + 0.9 - - true - - - src - - - - - bigint - + + + HASH - - - - INPUT__FILE__NAME - - - INPUT__FILE__NAME - - - true - - - src - - - - - - - - value - - - value - - - src - - - - - - - - key - - - key - - - src - - - - - - - - - - - - - - - - key + + + + _col0 - - src + + _col1 - - - - - - - value - - - src - - - - - - - - - - key - - - value - - + + GBY_2 - - true + + + + - - - - SEL_1 - - - - - - - - - - - - - - - - key + + + + + + + + _col0 + + + + + + + + + string + + - - src + + + + _col1 + + + + + + + + + double + + - - - - - string - - - - - value - - - src - - - - - - string - - - - - - + + + + + BLOCK__OFFSET__INSIDE__FILE + + + BLOCK__OFFSET__INSIDE__FILE + + + true + + + src + + + + + bigint + + + + + + + INPUT__FILE__NAME + + + INPUT__FILE__NAME + + + true + + + src + + + + + + + + value + + + value + + + src + + + + + + + + key + + + key + + + src + + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + value + + + src + + + + + + + + + + + + key + + + value + + + + + true + + + + + SEL_1 + + + + + + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + + @@ -986,116 +974,110 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:double - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + string:double + + + escape.delim + \ + - - 1 - - - FS_6 + + 1 - + + + + FS_6 + + + + + + + + + - + + + _col0 + + + + + + + + + string + + - - - - - - - - - - _col0 - - - - - - - - - string - - + + + + _col1 - - - - _col1 - - - - - - - - - double - - + + + + + + + double + @@ -1104,118 +1086,116 @@ - - - - _col1 - - - _col1 - - - - - - - - - - - _col0 - - - _col0 - - - - - - - - - - + + + + + _col1 + + + _col1 + + + + + + + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_5 + + + + - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - - - - - SEL_5 - - - - - - - - - - - - - - - - key - - - _col0 - - - src - - - - - - string - - - - - - - _c1 - - - _col1 - - - - - - double - - - - - - - - - - + + + + + + + + + key + + + _col0 + + + src + + + + + + string + + + + + + + _c1 + + + _col1 + + + + + + double + + + + + + + + + @@ -1304,11 +1284,9 @@ GBY_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/groupby6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby6.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/groupby6.q.xml (working copy) @@ -153,441 +153,296 @@ src - - - - - - - - - + + + + + + + + + + + + KEY._col0 + + + _col0 + + + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + _col0 + + + + + + + + + + + -1 + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + + + + escape.delim + \ + + + + + + + + + RS_3 + + - - - - - KEY._col0 - - - _col0 - - - - - - - - string - - - - + + + + + + + + + + + KEY._col0 + + + + + + string + + + + + + + + + + + + + + _col0 + + + + + + + value - - - - - - + + src - - - - - - + + - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - _col0 - - - - - - - - - - - -1 - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - - - - escape.delim - \ - - - - - - - RS_3 - - - - - - - - - - - - - - - - KEY._col0 - - - - - - string - - + + + + + + int - - - - - - - - - - _col0 - - - - - - - value - - - src - - - - - + + 5 - - - - - - int - - - - - 5 - - - - - - - - - - 1 - - - - - - - org.apache.hadoop.hive.ql.udf.UDFSubstr + + + + - - substr + + 1 - - - - - - - - - - - - 0.5 - - - 1 - - - - - + + + + org.apache.hadoop.hive.ql.udf.UDFSubstr - - - - - - - 0.9 - - - - HASH - - - - - - _col0 + + substr - - - - GBY_2 - - - - - + + - - - - - - - - _col0 - - - - - - - - - string - - - - - - - - - - - - - BLOCK__OFFSET__INSIDE__FILE - - - BLOCK__OFFSET__INSIDE__FILE + + + + - - true + + 0.5 - - src + + 1 - - - - bigint + + + + - - - - INPUT__FILE__NAME - - - INPUT__FILE__NAME + + - - true + + 0.9 - - src + + + HASH + - - - - - - - value - - - value - - - src - - - - - - - - key - - - key - - - src - - - - - - - - - - - - - - - - value + + + + _col0 - - src - - - - - - - - value - - + + GBY_2 - - true + + + + - - - - SEL_1 - - - - - - - - - - - - - - - - value + + + + + + + + _col0 + + + + + + + + + string + + - - src - - - - - - string - @@ -594,9 +449,142 @@ - - - + + + + + BLOCK__OFFSET__INSIDE__FILE + + + BLOCK__OFFSET__INSIDE__FILE + + + true + + + src + + + + + bigint + + + + + + + INPUT__FILE__NAME + + + INPUT__FILE__NAME + + + true + + + src + + + + + + + + value + + + value + + + src + + + + + + + + key + + + key + + + src + + + + + + + + + + + + + + + + value + + + src + + + + + + + + + + + + value + + + + + true + + + + + SEL_1 + + + + + + + + + + + + + + value + + + src + + + + + + string + + + + + + + + + @@ -880,100 +868,94 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + string + + + escape.delim + \ + - - 1 - - - FS_6 + + 1 - + + + + FS_6 + + + + + + + + + - - - - - - - - - - - - _col0 - - - - - - - - - string - - + + + _col0 + + + + + + + + string + @@ -982,79 +964,77 @@ - - - - _col0 - - - _col0 - - - - - - - - - - + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + SEL_5 + + + + - - - - - - - - - - - - - _col0 - - - - - - - SEL_5 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - string - - - - - - - - - - + + + + + + + + + _c0 + + + _col0 + + + + + + string + + + + + + + + + @@ -1110,11 +1090,9 @@ GBY_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/input1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input1.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input1.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_6 + + + + - - FS_6 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -638,280 +634,268 @@ src - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - src - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src - - - - - + + src + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - key + + + _col0 src - + + + string + - - - - - int - - + + + _col1 - - 100 + + src + + + + + string + - - - - - - - boolean - - - - - FIL_4 + + + + + + + + + + + key + + + src + + + + + + + + + + + + int + + + + + 100 + + + + + + + + + + + + boolean + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - key - - - src - - - - - - string - - - - - - - value - - - src - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/input2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input2.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input2.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_11 + + + + - - FS_11 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -560,133 +556,129 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest2 - - - columns.types - string:string - - - serialization.ddl - struct dest2 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest2 + + + columns.types + string:string + + + serialization.ddl + struct dest2 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_13 + + + + - - FS_13 - - - - - - - - - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -1037,137 +1029,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest3 - - - columns.types - string:string - - - serialization.ddl - struct dest3 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - partition_columns - ds/hr - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest3 + + + columns.types + string:string + + + serialization.ddl + struct dest3 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + partition_columns + ds/hr + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_15 + + + + - - FS_15 - - - - - - - - - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -1605,804 +1593,772 @@ src - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - src - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src - - - - - + + src + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src - - true + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - - + + SEL_2 + + + + + + + + + - - - key + + + _col0 src - + + + string + - - - - - int - - + + + _col1 - - 100 + + src + + + + + string + - - - - - - - boolean - - - - - FIL_1 + + + + + + + + + + + key + + + src + + + + + + + + + + + + int + + + + + 100 + + + + + + + + + + + + boolean + + + + + + + + + FIL_1 + + + + - - - - + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + + + + + + + + + + + + + + 2 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_6 + + + + + + + + + + + - - - - - - - - - - key + + + + _col1 + + + value src - + - - string - - - - - value + + _col0 + + + key src - + - - string - - - - - - - - - - - - + + + - - - - - 2 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_6 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - src - - - - - + + - - _col0 - - - key - - - src - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - - - - - SEL_5 - - + - + _col0 - - - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - + + _col1 - - - - - - - + + SEL_5 + + + + + + + + + - - - - - - - key - - - src - - - - - - - - - - - - - 100 - - - - + + + _col0 - - + + src - - + + + + string + - - - - - - - key - - - src - - - - - - - - - - - - - 200 - - - - + + + _col1 - - + + src - - + + + + string + - - - - - - - - FIL_4 + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 100 + + + + + + + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 200 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 3 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + ds=2008-04-08/hr=12/ + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_9 + + - - - - - 3 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - ds=2008-04-08/hr=12/ - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_9 - - - - - - - - - - - - + - - - - - - _col1 - - - - - - 2 - - + + + + + + + + + + + _col1 + + + - - _col0 - - - key - - - src - - - - - + + 2 - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src + + + - - SEL_8 - - + + + + + - + + + + - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - - - - int - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_8 + + + + + + + + + - - - key + + + _col0 src - + + + string + - - + + + _col1 + + - - 200 + + int - - - - - - - - FIL_7 + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 200 + + + + + + + + + + + + + + + + + FIL_7 + + + + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + Index: ql/src/test/results/compiler/plan/input20.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input20.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input20.q.xml (working copy) @@ -153,530 +153,518 @@ tmap:src - - - - - - - - - + + + + + + + + + + + + _col1 + + + _col1 + + + + + string + + + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + reducesinkkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + + _col0 + + + + + + + + + + -1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + RS_3 + + - - - - - _col1 - - - _col1 - - - - - string - - - - - - - _col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - reducesinkkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - _col0 - - - - - - - - - - -1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - RS_3 - - - - - - - - - - - - - - - - key - - - _col0 - - - - - - string - - - - - - - value - - - _col1 - - - - - - string - - - - - - - - + - + + + + + + + + + key + + + _col0 + + + + + + string + + + + + + + value + + + _col1 + + + + + + string + + + + + + + + + + + + + + org.apache.hadoop.hive.ql.exec.TextRecordReader - - - - org.apache.hadoop.hive.ql.exec.TextRecordReader + + org.apache.hadoop.hive.ql.exec.TextRecordWriter + + + org.apache.hadoop.hive.ql.exec.TextRecordReader + + + cat + + + + + org.apache.hadoop.mapred.TextInputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordWriter + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordReader - - - cat - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + field.delim + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + KEY - - - - field.delim - 9 - - - columns - KEY - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - - - org.apache.hadoop.mapred.TextInputFormat + + serialization.format + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.DelimitedJSONSerDe - - - serialization.format - 9 - - - columns.types - double,double - - - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - - columns.types - string,string - - - serialization.last.column.takes.rest - true - - - - - - - SCR_2 - - - - - + + + + org.apache.hadoop.mapred.TextInputFormat - - - - - - + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - - - - - - _col1 - - - - - - - key + + + + field.delim + 9 - - src + + columns + _col0,_col1 - - + + serialization.lib + org.apache.hadoop.hive.serde2.DelimitedJSONSerDe - - - - - - - - int - - + + serialization.format + 9 - - 5 + + columns.types + double,double - - - - - - - double + + + + org.apache.hadoop.mapred.TextInputFormat - - - - - - _col0 - - - - - - - key + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + field.delim + 9 - - src + + columns + _col0,_col1 - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - - - + + serialization.format + 9 - - 2 + + columns.types + string,string + + serialization.last.column.takes.rest + true + - - - - - - - - - - - - - - - - - - - + + SCR_2 - - - - _col0 + + + + + + + + + - - _col1 - - - SEL_1 + + + + + _col1 + + + + + + + key + + + src + + + + + + + + + + + + int + + + + + 5 + + + + + + + + + + + + double + + + + + + + _col0 + + + + + + + key + + + src + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + - - - - - - - - - - - - - - - _col0 - - - - - - double - - - - - - - _col1 - - - - - - double - - - - - - - - - - + + + + + + + + + _col0 + + + + + + double + + + + + + + _col1 + + + + + + double + + + + + + + + + @@ -977,288 +965,278 @@ - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + string:string + + + escape.delim + \ + + + + + + + 1 + + + + + FS_7 + + - - - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string - - - escape.delim - \ - - - - - - - 1 - - + + + + + + + + + + + _col0 + + + + + + + + + string + + + + + + + _col1 + + + + + + + + + string + + + + + + + + + + + + + + org.apache.hadoop.hive.ql.exec.TextRecordReader + + + org.apache.hadoop.hive.ql.exec.TextRecordWriter + + + org.apache.hadoop.hive.ql.exec.TextRecordReader + + + uniq -c | sed "s@^ *@@" | sed "s@\t@_@" | sed "s@ @\t@" + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + field.delim + 9 - - FS_7 + + columns + KEY - - - - - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - - - - - _col0 - - - - - - - - - string - - - - - - - _col1 - - - - - - - - - string - - - - - - + + serialization.format + 9 - - - - org.apache.hadoop.hive.ql.exec.TextRecordReader + + + + org.apache.hadoop.mapred.TextInputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordWriter + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordReader - - - uniq -c | sed "s@^ *@@" | sed "s@\t@_@" | sed "s@ @\t@" - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + field.delim + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1 - - - - field.delim - 9 - - - columns - KEY - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - + + serialization.lib + org.apache.hadoop.hive.serde2.DelimitedJSONSerDe + + serialization.format + 9 + + + columns.types + string,string + - - - - org.apache.hadoop.mapred.TextInputFormat + + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + field.delim + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1 - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.DelimitedJSONSerDe - - - serialization.format - 9 - - - columns.types - string,string - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - - - org.apache.hadoop.mapred.TextInputFormat + + serialization.format + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns.types + string,string - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - - columns.types - string,string - - - - - SCR_6 - - + + + + SCR_6 + + + + + + + + + - + + + key + + + _col0 + + + + + + string + + - - - - - - - - - - key - - - _col0 - - - - - - string - - + + + + value - - - - value - - - _col1 - - - - - - string - - + + _col1 + + + + + string + @@ -1267,109 +1245,107 @@ - - - - _col1 - - - _col1 - - - tmap - - - - - - - - _col0 - - - _col0 - - - tmap - - - - - - - + + + + + _col1 + + + _col1 + + + tmap + + + + + + + + _col0 + + + _col0 + + + tmap + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_5 + + + + - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - - - - - SEL_5 - - - - - - - - - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - - - - - - - + + + + + + + + + _col0 + + + + + + string + + + + + + + _col1 + + + + + + string + + + + + + + + + @@ -1392,11 +1368,9 @@ EX_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/input3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input3.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input3.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_14 + + + + - - FS_14 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -560,133 +556,129 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest2 - - - columns.types - string:string - - - serialization.ddl - struct dest2 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest2 + + + columns.types + string:string + + + serialization.ddl + struct dest2 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_16 + + + + - - FS_16 - - - - - - - - - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -1037,137 +1029,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest3 - - - columns.types - string:string - - - serialization.ddl - struct dest3 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - partition_columns - ds/hr - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest3 + + + columns.types + string:string + + + serialization.ddl + struct dest3 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + partition_columns + ds/hr + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_18 + + + + - - FS_18 - - - - - - - - - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -1492,89 +1480,85 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + string + + + + + + + 1 + + + + + FS_20 + + + + - - FS_20 - - - - - - - - - - - - - - - - _col0 - - - - - - - - - string - - - - - - - - - - + + + + + + + + + _col0 + + + + + + + + + string + + + + + + + + + @@ -1947,1051 +1931,1009 @@ src - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - src - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src - - - - - + + src + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src - - true + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - - + + SEL_2 + + + + + + + + + - - - key + + + _col0 src - + + + string + - - - - - int - - + + + _col1 - - 100 + + src + + + + + string + - - - - - - - boolean - - - - - FIL_1 + + + + + + + + + + + key + + + src + + + + + + + + + + + + int + + + + + 100 + + + + + + + + + + + + boolean + + + + + + + + + FIL_1 + + + + - - - - + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + + + + + + + + + + + + + + 2 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_6 + + + + + + + + + + + - - - - - - - - - - key + + + + _col1 + + + value src - + - - string - - - - - value + + _col0 + + + key src - + - - string - - - - - - - - - - - - + + + - - - - - 2 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_6 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - src - - - - - + + - - _col0 - - - key - - - src - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - - - - - SEL_5 - - + - + _col0 - - - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - + + _col1 - - - - - - - + + SEL_5 + + + + + + + + + - - - - - - - key - - - src - - - - - - - - - - - - - 100 - - - - + + + _col0 - - + + src - - + + + + string + - - - - - - - key - - - src - - - - - - - - - - - - - 200 - - - - + + + _col1 - - + + src - - + + + + string + - - - - - - - - FIL_4 + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 100 + + + + + + + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 200 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 3 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + ds=2008-04-08/hr=12/ + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_9 + + - - - - - 3 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - ds=2008-04-08/hr=12/ - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_9 - - - - - - - - - - - - + - - - - - - _col1 - - - - - - 2 - - + + + + + + + + + + + _col1 + + + - - _col0 - - - key - - - src - - - - - + + 2 - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src + + + - - SEL_8 - - + + + + + - + + + + - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - - - - int - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_8 + + + + + + + + + - - - - - - - key - - - src - - - - - - - - - - - - - 200 - - - - + + + _col0 - - + + src - - + + + + string + - - - - - - - key - - - src - - - - - - - - - - - - - 300 - - - - + + + _col1 - - + + - - + + int - - - - - - - - FIL_7 + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 200 + + + + + + + + + + + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 300 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_7 + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + 4 + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_12 + + - - - - - 4 - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_12 - - - - - - - - - - - - + - - - - - - _col0 - - - value - - - src - - - - - + + + + + + + + + + + _col0 + + + value - - - - - - - - - - + + src - - - - _col0 - - + + - - SEL_11 - - + + + + + - + - - - - - - - - value - - - _col0 - - - src - - - - - - string - - - - + + + + _col0 - - - - - - - + + SEL_11 + + + + + + + + + - - - key + + + value + + _col0 + src - + - - - - - - + + string - - 300 - - - - - - - - - FIL_10 + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 300 + + + + + + + + + + + + + + + + + FIL_10 + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + Index: ql/src/test/results/compiler/plan/input4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input4.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input4.q.xml (working copy) @@ -297,269 +297,172 @@ tmap:src - - - - - - - - - + + + + + + + + + - - - - - - - - - _col1 - - - _col1 - - - - - string - - - - + + + + + _col1 + + + _col1 + + + + + string - + + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + _col0 - - - _col0 - - - - - + + + - - - - + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + reducesinkkey0 - - - - - - _col0 - - - - - - - + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - reducesinkkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - + + serialization.sort.order + + - - 1 + + columns.types + string - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - _col0 - - - - - - - - - - -1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - RS_3 + + + + 1 + + + -1 + + + + + reducesinkkey0 - - - - - - + + + + + + _col0 - - - - - - - - tkey - - - _col0 - - - - - - string - - - - - - - tvalue - - - _col1 - - - - - - string - - - - - - + + _col1 - - - - - - - - - - + + + + + + _col0 - - - - - - int - - - - - 100 - - + + - - + + + + -1 + + + + + - - - - boolean + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + @@ -567,264 +470,345 @@ - FIL_8 + RS_3 - - - - - + + + - + + + + + tkey + + + _col0 + + + + + + string + + + + + + + tvalue + + + _col1 + + + + + + string + + + + - + + + + + + + + + + + + + + + + int + + + + + 100 + + + + + + + + + + + + boolean + + + + + + + + + FIL_8 + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.hive.ql.exec.TextRecordReader - - - - org.apache.hadoop.hive.ql.exec.TextRecordReader + + org.apache.hadoop.hive.ql.exec.TextRecordWriter + + + org.apache.hadoop.hive.ql.exec.TextRecordReader + + + /bin/cat + + + + + org.apache.hadoop.mapred.TextInputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordWriter + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordReader - - - /bin/cat - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + field.delim + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + KEY - - - - field.delim - 9 - - - columns - KEY - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 9 + - - - - org.apache.hadoop.mapred.TextInputFormat + + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + field.delim + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1 - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.DelimitedJSONSerDe - - - serialization.format - 9 - - - columns.types - string,string - - + + serialization.lib + org.apache.hadoop.hive.serde2.DelimitedJSONSerDe - - - - - - org.apache.hadoop.mapred.TextInputFormat + + serialization.format + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns.types + string,string - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - - columns.types - string,string - - - - - SCR_2 - - - - - + + + + org.apache.hadoop.mapred.TextInputFormat - - - - - - + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + field.delim + 9 + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 9 + + + columns.types + string,string + + + - - - - - - _col1 - - - value - - - src - - - - - + + SCR_2 - - _col0 - - - key - - - src - - - - - + + + + - - - - - - - - + + + + - - - - - - - _col0 - - - _col1 - - - - - SEL_1 + + + + + _col1 + + + value + + + src + + + + + + + + _col0 + + + key + + + src + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + - - - - - - - - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - - - - - - - + + + + + + + + + _col0 + + + + + + string + + + + + + + _col1 + + + + + + string + + + + + + + + + @@ -1137,210 +1121,202 @@ - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_7 + + #### A masked pattern was here #### - - - - - - + + true - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - + + - - - - - - - - _col1 - - - _col1 + + 150 - - tmap + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - _col0 + + true - - tmap + + - - + + 1 - - - - - - - - - - - - - + + FS_7 - - - - _col0 - - - _col1 - - + + + + - - - - SEL_6 - - - - - - - - - - - - - - - - _col0 + + + + + + + + key + + + + + + + + + string + + - - tmap + + + + value + + + + + + + + + string + + - - - - - string - - - - - _col1 - - - tmap - - - - - - string - - - - - - + + + + + _col1 + + + _col1 + + + tmap + + + + + + + + _col0 + + + _col0 + + + tmap + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_6 + + + + + + + + + + + + + + _col0 + + + tmap + + + + + + string + + + + + + + _col1 + + + tmap + + + + + + string + + + + + + + + + @@ -1363,11 +1339,9 @@ EX_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/input5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input5.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input5.q.xml (working copy) @@ -305,412 +305,334 @@ tmap:src_thrift - - - - - - - - - + + + + + + + + + + + + _col1 + + + _col1 + + + + + string + + + + + + + _col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + reducesinkkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + + _col0 + + + + + + + + + + -1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + RS_3 + + - - - - - _col1 - - - _col1 - - - - - string - - - - - - - _col0 - - - _col0 - - - - - - - + + + + + + + + + + + tkey + + + _col0 + + + + + + string + + + + + + + tvalue + + + _col1 + + + + + + string + + + + + + + + + + + + + + org.apache.hadoop.hive.ql.exec.TextRecordReader + + + org.apache.hadoop.hive.ql.exec.TextRecordWriter + + + org.apache.hadoop.hive.ql.exec.TextRecordReader + + + /bin/cat + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + field.delim + 9 - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - reducesinkkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - _col0 - - - - - - - - - - -1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - + + columns + KEY - - RS_3 + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - - - + + serialization.format + 9 - - - - - - - - tkey - - - _col0 - - - - - - string - - - - - - - tvalue - - - _col1 - - - - - - string - - - - - - - - - - - org.apache.hadoop.hive.ql.exec.TextRecordReader + + + + org.apache.hadoop.mapred.TextInputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordWriter + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - org.apache.hadoop.hive.ql.exec.TextRecordReader - - - /bin/cat - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + field.delim + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1 - - - - field.delim - 9 - - - columns - KEY - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - + + serialization.lib + org.apache.hadoop.hive.serde2.DelimitedJSONSerDe - - - - - - org.apache.hadoop.mapred.TextInputFormat + + serialization.format + 9 - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns.types + array<int>,array<struct<myint:int,mystring:string,underscore_int:int>> - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.DelimitedJSONSerDe - - - serialization.format - 9 - - - columns.types - array<int>,array<struct<myint:int,mystring:string,underscore_int:int>> - - - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - field.delim - 9 - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 9 - - - columns.types - string,string - - - - - - - SCR_2 - - - - - + + + + org.apache.hadoop.mapred.TextInputFormat - - - - - - + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - - - - - - _col1 - - - lintstring - - - src_thrift - - - - - - - - - myint - - - mystring - - - underscore_int - - + + + + field.delim + 9 - - - - - - int - - - - - - - - - - + + columns + _col0,_col1 + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 9 + + + columns.types + string,string + @@ -717,97 +639,163 @@ - - _col0 - - - lint - - - src_thrift - - - - - - - - - + + SCR_2 - - - - - - - - - - - - - + + + + - - - - _col0 + + + + - - _col1 - - - SEL_1 + + + + + _col1 + + + lintstring + + + src_thrift + + + + + + + + + myint + + + mystring + + + underscore_int + + + + + + + + + int + + + + + + + + + + + + + + + + + + + _col0 + + + lint + + + src_thrift + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + - - - - - - - - - - - - - - - _col0 - - - - - - array<int> - - - - - - - _col1 - - - - - - array<struct<myint:int,mystring:string,underscore_int:int>> - - - - - - - - - - + + + + + + + + + _col0 + + + + + + array<int> + + + + + + + _col1 + + + + + + array<struct<myint:int,mystring:string,underscore_int:int>> + + + + + + + + + @@ -1203,210 +1191,202 @@ - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_6 + + #### A masked pattern was here #### - - - - - - + + true - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - + + - - - - - - - - _col1 - - - _col1 + + 150 - - tmap + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - _col0 + + true - - tmap + + - - + + 1 - - - - - - - - - - - - - + + FS_6 - - - - _col0 - - - _col1 - - + + + + - - - - SEL_5 - - - - - - - - - - - - - - - - _col0 + + + + + + + + key + + + + + + + + + string + + - - tmap + + + + value + + + + + + + + + string + + - - - - - string - - - - - _col1 - - - tmap - - - - - - string - - - - - - + + + + + _col1 + + + _col1 + + + tmap + + + + + + + + _col0 + + + _col0 + + + tmap + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_5 + + + + + + + + + + + + + + _col0 + + + tmap + + + + + + string + + + + + + + _col1 + + + tmap + + + + + + string + + + + + + + + + @@ -1429,11 +1409,9 @@ EX_4 - - - - - + + + Index: ql/src/test/results/compiler/plan/input6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input6.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input6.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_6 + + + + - - FS_6 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -638,206 +634,161 @@ src1 - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - src1 - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src1 - - - - - + + src1 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src1 + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - src1 - - - - - - string - - - - - - - _col1 - - - src1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - key + + + _col0 src1 - + + + string + - - - - - - - - - boolean + + + + _col1 + + + src1 + + + + + + string + + @@ -845,59 +796,92 @@ - - FIL_4 + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + + + + boolean + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - key - - - src1 - - - - - - string - - - - - - - value - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src1 + + + + + + string + + + + + + + value + + + src1 + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/input7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input7.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input7.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_4 + + + + - - FS_4 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -638,168 +634,160 @@ src1 - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_2 + + #### A masked pattern was here #### - - - - - - + + true - - + + - - - - - - - - _col1 - - - key + + 150 - - src1 + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - + + true - - - - - - - - - - + + - - + + 1 - - - - _col0 - - - _col1 - - + + FS_2 - - - - SEL_1 - - - - - + + + + - - - - - - - - - - _col0 - - - - - void - - - - - void - - - - - - - _col1 - - - src1 - - - - - - string - - - - + + - - - + + + + + _col1 + + + key + + + src1 + + + + + + + + _col0 + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + + + + + + + + + + + _col0 + + + + + void + + + + + void + + + + + + + _col1 + + + src1 + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/input8.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input8.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input8.q.xml (working copy) @@ -153,335 +153,327 @@ src1 - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - double:double:double - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + double:double:double + + + escape.delim + \ + - - 1 - - - FS_2 + + 1 - + + + + FS_2 + + + + + + + + + - - - - - - - - - - - - _col0 - - - - - - - - double - - - - - double - - + + + _col0 - - - - _col1 - - - - - - - - - double - - + + - - - - _col2 - - - - - - - + + double + + double + - - - - - - - - - - _col2 - - - - - - - - - - - - - - - - - - - - _col1 - - - - - - - key + + + _col1 - src1 + - - - - string - - + + + + double + - - - - - - - - - - - - - - _col0 - - - - - - - - - int - - + + + _col2 - - 4 + + + + + + + double + - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - - + + + + + _col2 + + + + + + + + + + + + + + + + + + + + + _col1 + + + + + + + key + + + src1 + + + + + string + + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + int + + + + + 4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + + + + + SEL_1 + + + + - - SEL_1 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - double - - - - - - - _c1 - - - _col1 - - - - - - double - - - - - - - _c2 - - - _col2 - - - - - - double - - - - - - - - - - + + + + + + + + + _c0 + + + _col0 + + + + + + double + + + + + + + _c1 + + + _col1 + + + + + + double + + + + + + + _c2 + + + _col2 + + + + + + double + + + + + + + + + Index: ql/src/test/results/compiler/plan/input9.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input9.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input9.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_6 + + + + - - FS_6 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -638,238 +634,226 @@ src1 - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + + + + + + + + + + + + _col1 + + + key + + + src1 + + + + - - - - _col1 - - - key - - - src1 - - - - - + + _col0 + + + - - _col0 - - - - - - - - - - - - - - - - - + + + + + + + + - - - - _col0 - - - _col1 - - + + - - SEL_2 - - + - + _col0 + + _col1 + - - - - - - - - _col0 - - - - - void - - - + + + + SEL_2 + + + + + + + + + + + + + + _col0 + + + void - - - - _col1 - - - src1 - - - - - - string - - + + void - - - - - - - - - - - - - + + + _col1 + + + src1 + + + + + + string + + - - - - - - - - - - boolean - - - - - FIL_4 + + + + + + + + + + + + + + + + + + + + + + boolean + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - key - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src1 + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/input_part1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_part1.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input_part1.q.xml (working copy) @@ -174,498 +174,486 @@ srcpart - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1,_col2,_col3 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + string:string:string:string + + + escape.delim + \ + + + + + + + 1 + + + + + FS_3 + + + + + + + + + + + + + + _col0 + + + + + + + + string + + + + + string + + + + + + + _col1 + + + + + + + + + string + + + + + + + _col2 + + + + + + + + + string + + + + + + + _col3 + + + + + + + + + string + + + + + + + + + + + + + + _col3 + + + ds + + + true + + + srcpart + + + + + + + + _col2 + + + hr + + + true + + + srcpart + + + + + + + + _col1 + + + value + + + srcpart + + + + + + + + _col0 + + + key + + + srcpart + + + + + + + + + + + - - - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string:string:string - - - escape.delim - \ - - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - - - - - _col0 - - - - - - - - string - - - - - string - - - - - - - _col1 - - - - - - - - - string - - - - - - - _col2 - - - - - - - - - string - - - - - - - _col3 - - - - - - - - - string - - - - - - - - + + + + + + + + + + - - - + + + + _col0 + + + _col1 + + + _col2 + + _col3 - - - ds + + + + + + + SEL_2 + + + + + + + + + + + + + + key - - true + + _col0 srcpart - + + + string + - - _col2 - - - hr + + + + value - - true + + _col1 srcpart - + + + string + - - _col1 - - - value + + + + hr - - srcpart + + _col2 - - - - - - - _col0 - - - key - srcpart - + - - - - - - - - - - + + string - - - - - - - - - - - - - _col0 + + + + ds - - _col1 - - - _col2 - - + _col3 - - - - - - SEL_2 - - - - - - - - - - - - - - - - key - - - _col0 - - - srcpart - - - - - - string - - - - - - - value - - - _col1 - - - srcpart - - - - - - string - - - - - - - hr - - - _col2 - - - srcpart - - - - - - string - - - - - - - ds - - - _col3 - - - srcpart - - - - - - string - - - - - - - - - - - - - - - - - - - - - key - srcpart - + - - - - - - - - int - - + + string - - 100 - - - - - - - - boolean - - - - - FIL_4 + + + + + + + + + + + key + + + srcpart + + + + + + + + + + + + int + + + + + 100 + + + + + + + + + + + + boolean + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - key - - - srcpart - - - - - - string - - - - - - - value - - - srcpart - - - - - - string - - - - - - - ds - - - srcpart - - - - - - string - - - - - - - hr - - - srcpart - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + srcpart + + + + + + string + + + + + + + value + + + srcpart + + + + + + string + + + + + + + ds + + + srcpart + + + + + + string + + + + + + + hr + + + srcpart + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/input_testsequencefile.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - name - default.dest4_sequencefile - - - columns.types - string:string - - - serialization.ddl - struct dest4_sequencefile { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.SequenceFileInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + name + default.dest4_sequencefile + + + columns.types + string:string + + + serialization.ddl + struct dest4_sequencefile { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.SequenceFileInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_4 + + + + - - FS_4 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -638,173 +634,165 @@ src - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_2 + + #### A masked pattern was here #### - - - - - - + + true - - + + - - - - - - - - _col1 - - - value + + 150 - - src + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - key + + true - - src + + - - + + 1 - - - - - - - - - - - - - + + FS_2 - - - - _col0 - - - _col1 - - + + + + - - - - SEL_1 - - - - - + + - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - - - + + + + + _col1 + + + value + + + src + + + + + + + + _col0 + + + key + + + src + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + - - - + + + + + + + + + _col0 + + + src + + + + + + string + + + + + + + _col1 + + + src + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/input_testxpath.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_testxpath.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input_testxpath.q.xml (working copy) @@ -161,420 +161,412 @@ src_thrift - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - int:string:string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - 1 - - - - - FS_2 - - - - - - - - - - - - - - - - _col0 - - - - - - - - int - - - - - int - - + + serialization.format + 1 - - - - _col1 - - - - - - - - string - - - - - string - - + + columns.types + int:string:string - - - - _col2 - - - - - - - - - string - - + + escape.delim + \ + + 1 + - - - - - - _col2 - - + + FS_2 + + + + + + + + + - - - mstringstring + + + _col0 - src_thrift + - - - - + + + + int - - - + + int + - - - + + + _col1 - - key_2 + + - - - - - - - - - - - - - - _col1 - - - - - - - - - lintstring + + + + string - - src_thrift - - - - - - - - - myint - - - mystring - - - underscore_int - - - - - - - - - - - - - - - - - - - - - - - - - - - 0 - - + + string - - - - - - - - - - mystring - - - false - - - - - - - - _col0 - - - - - - lint + + + _col2 - src_thrift + - - - - - - + + - - - - - - + + string - - 1 - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - - + + + + + _col2 + + + + + + + mstringstring + + + src_thrift + + + + + + + + + + + + + + + + + + + + key_2 + + + + + + + + + + + + + + + _col1 + + + + + + + + + lintstring + + + src_thrift + + + + + + + + + myint + + + mystring + + + underscore_int + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + + + + mystring + + + false + + + + + + + + _col0 + + + + + + + lint + + + src_thrift + + + + + + + + + + + + + + + + + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + + + + + SEL_1 + + + + - - SEL_1 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - int - - - - - - - mystring - - - _col1 - - - - - - string - - - - - - - _c2 - - - _col2 - - - - - - string - - - - - - - - - - + + + + + + + + + _c0 + + + _col0 + + + + + + int + + + + + + + mystring + + + _col1 + + + + + + string + + + + + + + _c2 + + + _col2 + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/input_testxpath2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_testxpath2.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/input_testxpath2.q.xml (working copy) @@ -161,140 +161,169 @@ src_thrift - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1,_col2 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + int:int:int + + + escape.delim + \ + + + + + + + 1 + + + + + FS_3 + + - - - - - #### A masked pattern was here #### + + + + + + + + + + + _col0 + + + + + + + + int + + + + + int + + + + + + + _col1 + + + + + + + + + int + + + + + + + _col2 + + + + + + + + + int + + + + + + + + + + + + + + _col2 + + + + + + + mstringstring - - 1 + + src_thrift - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - int:int:int - - - escape.delim - \ - - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - - - - - _col0 - - - - - - - - int - - - + + + + - int + string - - - - _col1 - - - - - - - - - int - - + + - - - - _col2 - - - - - - - - - int - - - @@ -301,92 +330,55 @@ + + + + + + - - - - _col2 - - - - - - - mstringstring - - - src_thrift - - - - - - - string + + _col1 + + + + + + + lintstring + + + src_thrift + + + + + + + + + myint + + mystring + + + underscore_int + - - - - - - - - - - - - - - - - - - - _col1 - - - - - - - lintstring - - - src_thrift - - - - - - - - - myint - - - mystring - - - underscore_int - - + + + + - - - - - - - - - - - - + + + + + @@ -395,307 +387,303 @@ - - - - - - - - _col0 - - - - - - - lint + + + + + + + + + + _col0 + + + + + + + lint + + + src_thrift + + + + + - - src_thrift - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - + + + + + + + + - - - - _col0 - - - _col1 - - - _col2 - - + + + + + - - SEL_2 - - + - + _col0 + + _col1 + + + _col2 + - - - - - - - - _c0 - - - _col0 - - - - - - int - - + + + + SEL_2 + + + + + + + + + + + + + + _c0 - - - - _c1 - - - _col1 - - - - - - int - - + + _col0 - - - - _c2 - - - _col2 - - - - - - int - - + + + + int + - - - - - - - - - - - - - - - - - - - lint - - - src_thrift - - - - - - - + + + _c1 - - + + _col1 - - - - boolean - - + + + + int + - - - - - - - - - - - mstringstring - - - src_thrift - - - - - - - - - - - - - - - - - + + + _c2 - - + + _col2 - - + + + + int + - - - - - - - - FIL_4 + + + + + + + + + + + + + + + lint + + + src_thrift + + + + + + + + + + + + + + + boolean + + + + + + + + + + + + + + + + + mstringstring + + + src_thrift + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - lint - - - src_thrift - - - - - - array<int> - - - - - - - lintstring - - - src_thrift - - - - - - array<struct<myint:int,mystring:string,underscore_int:int>> - - - - - - - mstringstring - - - src_thrift - - - - - - map<string,string> - - - - - - - - - - + + + + + + + + + lint + + + src_thrift + + + + + + array<int> + + + + + + + lintstring + + + src_thrift + + + + + + array<struct<myint:int,mystring:string,underscore_int:int>> + + + + + + + mstringstring + + + src_thrift + + + + + + map<string,string> + + + + + + + + + Index: ql/src/test/results/compiler/plan/join1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join1.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join1.q.xml (working copy) @@ -433,218 +433,214 @@ src2 - - - - - - - VALUE._col1 - - - value - - - src2 - - - - - string - - - - - - - VALUE._col0 - - - key - - - src2 - - - - - - - + + + + + + VALUE._col1 + + + value + + + src2 + + + + + string + + + + + + + VALUE._col0 + + + key + + + src2 + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col1 + + + + + + + + 1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + src2 + + + RS_3 + + + + - - - - - - - - - - - key - - - src2 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col1 - - - - - - - - 1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - src2 - - - RS_3 - - - - - - - - - - - - - - - - VALUE._col0 - - - src2 - - - - - - string - - - - - - - VALUE._col1 - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col0 + + + src2 + + + + + + string + + + + + + + VALUE._col1 + + + src2 + + + + + + string + + + + + + + + + @@ -767,181 +763,177 @@ src1 - - - - - - - VALUE._col0 - - - key - - - src1 - - - - - - - + + + + + + VALUE._col0 + + + key + + + src1 + + + + + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + src1 + + + RS_2 + + + + - - - - - - - - - - - key - - - src1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - src1 - - - RS_2 - - - - - - - - - - - - - - - - VALUE._col0 - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col0 + + + src1 + + + + + + string + + + + + + + + + @@ -1250,210 +1242,202 @@ - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_6 + + #### A masked pattern was here #### - - - - - - + + true - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - + + - - - - - - - - _col1 - - - _col5 + + 150 - - src2 + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - _col0 + + true - - src1 + + - - + + 1 - - - - - - - - - - - - - + + FS_6 - - - - _col0 - - - _col1 - - + + + + - - - - SEL_5 - - - - - - - - - - - - - - - - _col0 + + + + + + + + key + + + + + + + + + string + + - - src1 + + + + value + + + + + + + + + string + + - - - - - string - - - - - _col1 - - - src2 - - - - - - string - - - - - - + + + + + _col1 + + + _col5 + + + src2 + + + + + + + + _col0 + + + _col0 + + + src1 + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_5 + + + + + + + + + + + + + + _col0 + + + src1 + + + + + + string + + + + + + + _col1 + + + src2 + + + + + + string + + + + + + + + + @@ -1603,14 +1587,12 @@ JOIN_4 - - - - - - - - + + + + + + Index: ql/src/test/results/compiler/plan/join2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join2.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join2.q.xml (working copy) @@ -308,213 +308,209 @@ $INTNAME - - - - - - - VALUE._col4 - - - _col0 - - - src1 - - - - - string - - - - - - + + + + + + VALUE._col4 + + + _col0 + + + src1 + + + + + string + + + + + + + + + + + + + + + + + + + + + + _col0 + + + src1 + + + + + + + + + + _col4 + + + src2 + + + + + + + + + + + + + + + double + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + double + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col4 + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col4 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + RS_6 + + + + - - - - - - - - - - - - - - - _col0 - - - src1 - - - - - - - - - - _col4 - - - src2 - - - - - - - - - - - - - - - double - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - double - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col4 - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col4 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - RS_6 - - - - - - - - - - - - - - - - VALUE._col4 - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col4 + + + src1 + + + + + + string + + + + + + + + + @@ -588,205 +584,201 @@ src3 - - - - - - - VALUE._col1 - - - value - - - src3 - - - - - - - + + + + + + VALUE._col1 + + + value + + + src3 + + + + + + + + + + + + + + + + + + + + + + + key + + + src3 + + + + + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFToDouble + + + UDFToDouble + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + double + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col1 + + + + + + + + 1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + src3 + + + RS_7 + + + + - - - - - - - - - - - - - - - key - - - src3 - - - - - - - - - - - - org.apache.hadoop.hive.ql.udf.UDFToDouble - - - UDFToDouble - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - double - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col1 - - - - - - - - 1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - src3 - - - RS_7 - - - - - - - - - - - - - - - - VALUE._col1 - - - src3 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col1 + + + src3 + + + + + + string + + + + + + + + + @@ -1157,210 +1149,202 @@ - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_10 + + #### A masked pattern was here #### - - - - - - + + true - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - + + - - - - - - - - _col1 - - - _col9 + + 150 - - src3 + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - _col4 + + true - - src1 + + - - + + 1 - - - - - - - - - - - - - + + FS_10 - - - - _col0 - - - _col1 - - + + + + - - - - SEL_9 - - - - - - - - - - - - - - - - _col0 + + + + + + + + key + + + + + + + + + string + + - - src1 + + + + value + + + + + + + + + string + + - - - - - string - - - - - _col1 - - - src3 - - - - - - string - - - - - - + + + + + _col1 + + + _col9 + + + src3 + + + + + + + + _col0 + + + _col4 + + + src1 + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_9 + + + + + + + + + + + + + + _col0 + + + src1 + + + + + + string + + + + + + + _col1 + + + src3 + + + + + + string + + + + + + + + + @@ -1526,14 +1510,12 @@ JOIN_8 - - - - - - - - + + + + + + @@ -1901,184 +1883,180 @@ src2 - - - - - - - VALUE._col0 - - - key - - - src2 - - - - - - - + + + + + + VALUE._col0 + + + key + + + src2 + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + + + + + + 1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + src2 + + + RS_4 + + + + - - - - - - - - - - - key - - - src2 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - - - - - - 1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - src2 - - - RS_4 - - - - - - - - - - - - - - - - VALUE._col0 - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col0 + + + src2 + + + + + + string + + + + + + + + + @@ -2191,181 +2169,177 @@ src1 - - - - - - - VALUE._col0 - - - key - - - src1 - - - - - - - + + + + + + VALUE._col0 + + + key + + + src1 + + + + + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + src1 + + + RS_3 + + + + - - - - - - - - - - - key - - - src1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - src1 - - - RS_3 - - - - - - - - - - - - - - - - VALUE._col0 - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col0 + + + src1 + + + + + + string + + + + + + + + + @@ -2668,41 +2642,37 @@ - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + + 1 + + + + + FS_11 + + + + - - FS_11 - - - - - - - - - - - - - - + + + + + + @@ -2852,14 +2822,12 @@ JOIN_5 - - - - - - - - + + + + + + Index: ql/src/test/results/compiler/plan/join3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join3.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join3.q.xml (working copy) @@ -569,180 +569,176 @@ src2 - - - - - - - VALUE._col0 - - - key - - - src2 - - - - - string - - - - - - + + + + + + VALUE._col0 + + + key + + + src2 + + + + + string + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + + + + + 1 + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + + + + escape.delim + \ + + + + + + + + + src2 + + + RS_4 + + + + - - - - - - - - - - - key - - - src2 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - - - - - 1 - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - - - - escape.delim - \ - - - - - - - - - src2 - - - RS_4 - - - - - - - - - - - - - - - - VALUE._col0 - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col0 + + + src2 + + + + + + string + + + + + + + + + @@ -859,214 +855,210 @@ src3 - - - - - - - VALUE._col1 - - - value - - - src3 - - - - - - - - VALUE._col0 - - - key - - - src3 - - - - - - - + + + + + + VALUE._col1 + + + value + + + src3 + + + + + + + + VALUE._col0 + + + key + + + src3 + + + + + + + + + + + + + + + + + + + key + + + src3 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col1 + + + + + + + + 2 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + src3 + + + RS_5 + + + + - - - - - - - - - - - key - - - src3 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col1 - - - - - - - - 2 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - src3 - - - RS_5 - - - - - - - - - - - - - - - - VALUE._col0 - - - src3 - - - - - - string - - - - - - - VALUE._col1 - - - src3 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col0 + + + src3 + + + + + + string + + + + + + + VALUE._col1 + + + src3 + + + + + + string + + + + + + + + + @@ -1185,181 +1177,177 @@ src1 - - - - - - - VALUE._col0 - - - key - - - src1 - - - - - - - + + + + + + VALUE._col0 + + + key + + + src1 + + + + + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string + + + escape.delim + \ + + + + + + + + + src1 + + + RS_3 + + + + - - - - - - - - - - - key - - - src1 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string - - - escape.delim - \ - - - - - - - - - src1 - - - RS_3 - - - - - - - - - - - - - - - - VALUE._col0 - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + VALUE._col0 + + + src1 + + + + + + string + + + + + + + + + @@ -1671,210 +1659,202 @@ - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - + + + + + + + + + 1 - - FS_8 + + #### A masked pattern was here #### - - - - - - + + true - - - - - - - - key - - - - - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - + + - - - - - - - - _col1 - - - _col9 + + 150 - - src3 + + 1 - - + + #### A masked pattern was here #### - - - - _col0 - - - _col0 + + true - - src1 + + - - + + 1 - - - - - - - - - - - - - + + FS_8 - - - - _col0 - - - _col1 - - + + + + - - - - SEL_7 - - - - - - - - - - - - - - - - _col0 + + + + + + + + key + + + + + + + + + string + + - - src1 + + + + value + + + + + + + + + string + + - - - - - string - - - - - _col1 - - - src3 - - - - - - string - - - - - - + + + + + _col1 + + + _col9 + + + src3 + + + + + + + + _col0 + + + _col0 + + + src1 + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_7 + + + + + + + + + + + + + + _col0 + + + src1 + + + + + + string + + + + + + + _col1 + + + src3 + + + + + + string + + + + + + + + + @@ -2058,17 +2038,15 @@ JOIN_6 - - - - - - - - - - - + + + + + + + + + Index: ql/src/test/results/compiler/plan/join4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join4.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join4.q.xml (working copy) @@ -289,478 +289,466 @@ c:a:src1 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + string + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + a + + + RS_6 + + - - - - - VALUE._col1 - - - _col1 - - - - - string - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - a - - - RS_6 - - - - - - - - - - - - - - - - VALUE._col0 - - - a - - - - - - string - - - - - - - VALUE._col1 - - - a - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src1 - - - - - + + + + + + + + + VALUE._col0 + + + a + + + + + + string + + + + + + + VALUE._col1 + + + a + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src1 - - - - - + + src1 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src1 + + + - - SEL_5 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_5 + + + + + + + + + - - - - - - - key - - - src1 - - - - - - - - - - - - int - - - - - 10 - - - - + + + _col0 - - + + - - - - boolean - - + + string - - - - - - - key - - - src1 - - - - - - - - - - - - - 20 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_12 + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + int + + + + + 10 + + + + + + + + + + + + boolean + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + + 20 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_12 + + + + - - - - - - - - - - - - - - - key - - - src1 - - - - - - string - - - - - - - value - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src1 + + + + + + string + + + + + + + value + + + src1 + + + + + + string + + + + + + + + + @@ -857,469 +845,457 @@ c:b:src2 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + 1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + b + + + RS_7 + + - - - - - VALUE._col1 - - - _col1 - - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - 1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - b - - - RS_7 - - - - - - - - - - - - - - - - VALUE._col0 - - - b - - - - - - string - - - - - - - VALUE._col1 - - - b - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src2 - - - - - + + + + + + + + + VALUE._col0 + + + b + + + + + + string + + + + + + + VALUE._col1 + + + b + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src2 - - - - - + + src2 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src2 + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - - - - - key - - - src2 - - - - - - - - - - - - - 15 - - - - + + + _col0 - - + + - - + + string - - - - - - - key - - - src2 - - - - - - - - - - - - - 25 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_13 + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 15 + + + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 25 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_13 + + + + - - - - - - - - - - - - - - - key - - - src2 - - - - - - string - - - - - - - value - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src2 + + + + + + string + + + + + + + value + + + src2 + + + + + + string + + + + + + + + + @@ -1602,349 +1578,341 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2,_col3 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string:string:string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + string:string:string:string + + + escape.delim + \ + - - 1 - - - FS_11 + + 1 - + + + + FS_11 + + + + + + + + + - + + + _col0 + + + + + + + + + string + + - - - - - - - - - - _col0 - - - - - - - - - string - - + + + + _col1 - - - - _col1 - - - - - - - - - string - - + + - - - - _col2 - - - - - - - - - string - - + + - - - - _col3 - - - - - - - - - string - - + + string - - - - - - - - - - _col3 - - - _col3 - - - b - - - - - - - - _col2 - - - _col2 - - - b - - - - - - - - _col1 - - - _col1 - - - a - - - - - - - - _col0 - - - _col0 - - - a - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - - - - - SEL_9 - - - - - - - - - - - - - - - - c1 + + + + _col2 + + + + + + + + + string + + - - _col0 + + + + _col3 + + + + + + + + + string + + - - c - - - - - - string - - - - - c2 - - - _col1 - - - c - - - - - - string - - - - - - - c3 - - - _col2 - - - c - - - - - - string - - - - - - - c4 - - - _col3 - - - c - - - - - - string - - - - - - + + + + + _col3 + + + _col3 + + + b + + + + + + + + _col2 + + + _col2 + + + b + + + + + + + + _col1 + + + _col1 + + + a + + + + + + + + _col0 + + + _col0 + + + a + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + + + + + SEL_9 + + + + + + + + + + + + + + c1 + + + _col0 + + + c + + + + + + string + + + + + + + c2 + + + _col1 + + + c + + + + + + string + + + + + + + c3 + + + _col2 + + + c + + + + + + string + + + + + + + c4 + + + _col3 + + + c + + + + + + string + + + + + + + + + @@ -2126,14 +2094,12 @@ JOIN_8 - - - - - - - - + + + + + + Index: ql/src/test/results/compiler/plan/join5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join5.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join5.q.xml (working copy) @@ -289,478 +289,466 @@ c:a:src1 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + string + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + a + + + RS_6 + + - - - - - VALUE._col1 - - - _col1 - - - - - string - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - a - - - RS_6 - - - - - - - - - - - - - - - - VALUE._col0 - - - a - - - - - - string - - - - - - - VALUE._col1 - - - a - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src1 - - - - - + + + + + + + + + VALUE._col0 + + + a + + + + + + string + + + + + + + VALUE._col1 + + + a + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src1 - - - - - + + src1 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src1 + + + - - SEL_5 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_5 + + + + + + + + + - - - - - - - key - - - src1 - - - - - - - - - - - - int - - - - - 10 - - - - + + + _col0 - - + + - - - - boolean - - + + string - - - - - - - key - - - src1 - - - - - - - - - - - - - 20 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_12 + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + int + + + + + 10 + + + + + + + + + + + + boolean + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + + 20 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_12 + + + + - - - - - - - - - - - - - - - key - - - src1 - - - - - - string - - - - - - - value - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src1 + + + + + + string + + + + + + + value + + + src1 + + + + + + string + + + + + + + + + @@ -857,469 +845,457 @@ c:b:src2 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + 1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + b + + + RS_7 + + - - - - - VALUE._col1 - - - _col1 - - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - 1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - b - - - RS_7 - - - - - - - - - - - - - - - - VALUE._col0 - - - b - - - - - - string - - - - - - - VALUE._col1 - - - b - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src2 - - - - - + + + + + + + + + VALUE._col0 + + + b + + + + + + string + + + + + + + VALUE._col1 + + + b + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src2 - - - - - + + src2 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src2 + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - - - - - key - - - src2 - - - - - - - - - - - - - 15 - - - - + + + _col0 - - + + - - + + string - - - - - - - key - - - src2 - - - - - - - - - - - - - 25 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_13 + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 15 + + + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 25 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_13 + + + + - - - - - - - - - - - - - - - key - - - src2 - - - - - - string - - - - - - - value - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src2 + + + + + + string + + + + + + + value + + + src2 + + + + + + string + + + + + + + + + @@ -1602,349 +1578,341 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2,_col3 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string:string:string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + string:string:string:string + + + escape.delim + \ + - - 1 - - - FS_11 + + 1 - + + + + FS_11 + + + + + + + + + - + + + _col0 + + + + + + + + + string + + - - - - - - - - - - _col0 - - - - - - - - - string - - + + + + _col1 - - - - _col1 - - - - - - - - - string - - + + - - - - _col2 - - - - - - - - - string - - + + - - - - _col3 - - - - - - - - - string - - + + string - - - - - - - - - - _col3 - - - _col3 - - - b - - - - - - - - _col2 - - - _col2 - - - b - - - - - - - - _col1 - - - _col1 - - - a - - - - - - - - _col0 - - - _col0 - - - a - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - - - - - SEL_9 - - - - - - - - - - - - - - - - c1 + + + + _col2 + + + + + + + + + string + + - - _col0 + + + + _col3 + + + + + + + + + string + + - - c - - - - - - string - - - - - c2 - - - _col1 - - - c - - - - - - string - - - - - - - c3 - - - _col2 - - - c - - - - - - string - - - - - - - c4 - - - _col3 - - - c - - - - - - string - - - - - - + + + + + _col3 + + + _col3 + + + b + + + + + + + + _col2 + + + _col2 + + + b + + + + + + + + _col1 + + + _col1 + + + a + + + + + + + + _col0 + + + _col0 + + + a + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + + + + + SEL_9 + + + + + + + + + + + + + + c1 + + + _col0 + + + c + + + + + + string + + + + + + + c2 + + + _col1 + + + c + + + + + + string + + + + + + + c3 + + + _col2 + + + c + + + + + + string + + + + + + + c4 + + + _col3 + + + c + + + + + + string + + + + + + + + + @@ -2122,14 +2090,12 @@ JOIN_8 - - - - - - - - + + + + + + Index: ql/src/test/results/compiler/plan/join6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join6.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join6.q.xml (working copy) @@ -289,478 +289,466 @@ c:a:src1 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + string + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + a + + + RS_6 + + - - - - - VALUE._col1 - - - _col1 - - - - - string - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - a - - - RS_6 - - - - - - - - - - - - - - - - VALUE._col0 - - - a - - - - - - string - - - - - - - VALUE._col1 - - - a - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src1 - - - - - + + + + + + + + + VALUE._col0 + + + a + + + + + + string + + + + + + + VALUE._col1 + + + a + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src1 - - - - - + + src1 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src1 + + + - - SEL_5 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_5 + + + + + + + + + - - - - - - - key - - - src1 - - - - - - - - - - - - int - - - - - 10 - - - - + + + _col0 - - + + - - - - boolean - - + + string - - - - - - - key - - - src1 - - - - - - - - - - - - - 20 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_12 + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + int + + + + + 10 + + + + + + + + + + + + boolean + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + + 20 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_12 + + + + - - - - - - - - - - - - - - - key - - - src1 - - - - - - string - - - - - - - value - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src1 + + + + + + string + + + + + + + value + + + src1 + + + + + + string + + + + + + + + + @@ -857,469 +845,457 @@ c:b:src2 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + 1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + b + + + RS_7 + + - - - - - VALUE._col1 - - - _col1 - - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - 1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - b - - - RS_7 - - - - - - - - - - - - - - - - VALUE._col0 - - - b - - - - - - string - - - - - - - VALUE._col1 - - - b - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src2 - - - - - + + + + + + + + + VALUE._col0 + + + b + + + + + + string + + + + + + + VALUE._col1 + + + b + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src2 - - - - - + + src2 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src2 + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - - - - - key - - - src2 - - - - - - - - - - - - - 15 - - - - + + + _col0 - - + + - - + + string - - - - - - - key - - - src2 - - - - - - - - - - - - - 25 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_13 + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 15 + + + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 25 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_13 + + + + - - - - - - - - - - - - - - - key - - - src2 - - - - - - string - - - - - - - value - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src2 + + + + + + string + + + + + + + value + + + src2 + + + + + + string + + + + + + + + + @@ -1602,349 +1578,341 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2,_col3 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string:string:string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + string:string:string:string + + + escape.delim + \ + - - 1 - - - FS_11 + + 1 - + + + + FS_11 + + + + + + + + + - + + + _col0 + + + + + + + + + string + + - - - - - - - - - - _col0 - - - - - - - - - string - - + + + + _col1 - - - - _col1 - - - - - - - - - string - - + + - - - - _col2 - - - - - - - - - string - - + + - - - - _col3 - - - - - - - - - string - - + + string - - - - - - - - - - _col3 - - - _col3 - - - b - - - - - - - - _col2 - - - _col2 - - - b - - - - - - - - _col1 - - - _col1 - - - a - - - - - - - - _col0 - - - _col0 - - - a - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - - - - - SEL_9 - - - - - - - - - - - - - - - - c1 + + + + _col2 + + + + + + + + + string + + - - _col0 + + + + _col3 + + + + + + + + + string + + - - c - - - - - - string - - - - - c2 - - - _col1 - - - c - - - - - - string - - - - - - - c3 - - - _col2 - - - c - - - - - - string - - - - - - - c4 - - - _col3 - - - c - - - - - - string - - - - - - + + + + + _col3 + + + _col3 + + + b + + + + + + + + _col2 + + + _col2 + + + b + + + + + + + + _col1 + + + _col1 + + + a + + + + + + + + _col0 + + + _col0 + + + a + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + + + + + SEL_9 + + + + + + + + + + + + + + c1 + + + _col0 + + + c + + + + + + string + + + + + + + c2 + + + _col1 + + + c + + + + + + string + + + + + + + c3 + + + _col2 + + + c + + + + + + string + + + + + + + c4 + + + _col3 + + + c + + + + + + string + + + + + + + + + @@ -2129,14 +2097,12 @@ JOIN_8 - - - - - - - - + + + + + + Index: ql/src/test/results/compiler/plan/join7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join7.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join7.q.xml (working copy) @@ -425,478 +425,466 @@ c:a:src1 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + string + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + a + + + RS_9 + + - - - - - VALUE._col1 - - - _col1 - - - - - string - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - a - - - RS_9 - - - - - - - - - - - - - - - - VALUE._col0 - - - a - - - - - - string - - - - - - - VALUE._col1 - - - a - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src1 - - - - - + + + + + + + + + VALUE._col0 + + + a + + + + + + string + + + + + + + VALUE._col1 + + + a + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src1 - - - - - + + src1 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src1 + + + - - SEL_8 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_8 + + + + + + + + + - - - - - - - key - - - src1 - - - - - - - - - - - - int - - - - - 10 - - - - + + + _col0 - - + + - - - - boolean - - + + string - - - - - - - key - - - src1 - - - - - - - - - - - - - 20 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_16 + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + int + + + + + 10 + + + + + + + + + + + + boolean + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + + 20 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_16 + + + + - - - - - - - - - - - - - - - key - - - src1 - - - - - - string - - - - - - - value - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src1 + + + + + + string + + + + + + + value + + + src1 + + + + + + string + + + + + + + + + @@ -993,469 +981,457 @@ c:b:src2 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + 1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + b + + + RS_10 + + - - - - - VALUE._col1 - - - _col1 - - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - 1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - b - - - RS_10 - - - - - - - - - - - - - - - - VALUE._col0 - - - b - - - - - - string - - - - - - - VALUE._col1 - - - b - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src2 - - - - - + + + + + + + + + VALUE._col0 + + + b + + + + + + string + + + + + + + VALUE._col1 + + + b + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src2 - - - - - + + src2 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src2 + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - - - - - key - - - src2 - - - - - - - - - - - - - 15 - - - - + + + _col0 - - + + - - + + string - - - - - - - key - - - src2 - - - - - - - - - - - - - 25 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_17 + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 15 + + + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 25 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_17 + + + + - - - - - - - - - - - - - - - key - - - src2 - - - - - - string - - - - - - - value - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src2 + + + + + + string + + + + + + + value + + + src2 + + + + + + string + + + + + + + + + @@ -1548,469 +1524,457 @@ c:c:src3 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + 2 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + c + + + RS_11 + + - - - - - VALUE._col1 - - - _col1 - - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - 2 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - c - - - RS_11 - - - - - - - - - - - - - - - - VALUE._col0 - - - c - - - - - - string - - - - - - - VALUE._col1 - - - c - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src3 - - - - - + + + + + + + + + VALUE._col0 + + + c + + + + + + string + + + + + + + VALUE._col1 + + + c + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src3 - - - - - + + src3 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src3 + + + - - SEL_5 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_5 + + + + + + + + + - - - - - - - key - - - src3 - - - - - - - - - - - - - 20 - - - - + + + _col0 - - + + - - + + string - - - - - - - key - - - src3 - - - - - - - - - - - - - 25 - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_18 + + + + + + + + + + + + + + + key + + + src3 + + + + + + + + + + + + + 20 + + + + + + + + + + + + + + + + + + + + + key + + + src3 + + + + + + + + + + + + + 25 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_18 + + + + - - - - - - - - - - - - - - - key - - - src3 - - - - - - string - - - - - - - value - - - src3 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src3 + + + + + + string + + + + + + + value + + + src3 + + + + + + string + + + + + + + + + @@ -2296,459 +2260,451 @@ - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2,_col3,_col4,_col5 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3,_col4,_col5 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string:string:string:string:string - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + string:string:string:string:string:string + + + escape.delim + \ + - - 1 - - - FS_15 + + 1 - + + + + FS_15 + + + + + + + + + - + + + _col0 + + + + + + + + + string + + - - - - - - - - - - _col0 - - - - - - - - - string - - + + + + _col1 - - - - _col1 - - - - - - - - - string - - + + - - - - _col2 - - - - - - - - - string - - + + - - - - _col3 - - - - - - - - - string - - + + string - - - - _col4 - - - - - - - - - string - - + + + + + + _col2 - - - - _col5 - - - - - - - - - string - - + + + + + + + string + - - - - - - - - - - _col5 - - - _col5 - - - c - - - - - - - - _col4 - - - _col4 - - - c - - - - - - - - _col3 - - - _col3 - - - b - - - - - - - - _col2 - - - _col2 - - - b - - - - - - - - _col1 - - - _col1 - - - a - - - - - - - - _col0 - - - _col0 - - - a - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - _col4 - - - _col5 - - - - - - - SEL_13 - - - - - - - - - - - - - - - - c1 + + + + _col3 + + + + + + + + + string + + - - _col0 + + + + _col4 + + + + + + + + + string + + - - c + + + + _col5 + + + + + + + + + string + + - - - - - string - - - - - c2 - - - _col1 - - - c - - - - - - string - - - - - - - c3 - - - _col2 - - - c - - - - - - string - - - - - - - c4 - - - _col3 - - - c - - - - - - string - - - - - - - c5 - - - _col4 - - - c - - - - - - string - - - - - - - c6 - - - _col5 - - - c - - - - - - string - - - - - - + + + + + _col5 + + + _col5 + + + c + + + + + + + + _col4 + + + _col4 + + + c + + + + + + + + _col3 + + + _col3 + + + b + + + + + + + + _col2 + + + _col2 + + + b + + + + + + + + _col1 + + + _col1 + + + a + + + + + + + + _col0 + + + _col0 + + + a + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + _col4 + + + _col5 + + + + + + + SEL_13 + + + + + + + + + + + + + + c1 + + + _col0 + + + c + + + + + + string + + + + + + + c2 + + + _col1 + + + c + + + + + + string + + + + + + + c3 + + + _col2 + + + c + + + + + + string + + + + + + + c4 + + + _col3 + + + c + + + + + + string + + + + + + + c5 + + + _col4 + + + c + + + + + + string + + + + + + + c6 + + + _col5 + + + c + + + + + + string + + + + + + + + + @@ -3006,17 +2962,15 @@ JOIN_12 - - - - - - - - - - - + + + + + + + + + Index: ql/src/test/results/compiler/plan/join8.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join8.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/join8.q.xml (working copy) @@ -289,519 +289,507 @@ c:a:src1 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + string + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + a + + + RS_6 + + - - - - - VALUE._col1 - - - _col1 - - - - - string - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - a - - - RS_6 - - - - - - - - - - - - - - - - VALUE._col0 - - - a - - - - - - string - - - - - - - VALUE._col1 - - - a - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src1 - - - - - + + + + + + + + + VALUE._col0 + + + a + + + + + + string + + + + + + + VALUE._col1 + + + a + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src1 - - - - - + + src1 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src1 + + + - - SEL_5 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_5 + + + + + + + + + - - - - - - - - - - - key - - - src1 - - - - - - - - - - - - int - - - - - 10 - - - - - - - - - - - - boolean - - - - - - - - - - - - - key - - - src1 - - - - - - - - - - - - - 20 - - - - - - - - - - - - - - + + + _col0 - - + + - - + + string - - - - - - - key - - - src1 - - - - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_14 + + + + + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + int + + + + + 10 + + + + + + + + + + + + boolean + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + + 20 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key + + + src1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_14 + + + + - - - - - - - - - - - - - - - key - - - src1 - - - - - - string - - - - - - - value - - - src1 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src1 + + + + + + string + + + + + + + value + + + src1 + + + + + + string + + + + + + + + + @@ -898,510 +886,498 @@ c:b:src2 - - - - - - - - - + + + + + + + + + + + + VALUE._col1 + + + _col1 + + + + + + + + VALUE._col0 + + + _col0 + + + + + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + joinkey0 + + + serialization.lib + org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe + + + serialization.sort.order + + + + + columns.types + string + + + + + + + 1 + + + -1 + + + + + reducesinkkey0 + + + + + + + _col0 + + + _col1 + + + + + + + + 1 + + + + + + + + + + + + + + + org.apache.hadoop.mapred.SequenceFileInputFormat + + + org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + + columns.types + string,string + + + escape.delim + \ + + + + + + + + + b + + + RS_7 + + - - - - - VALUE._col1 - - - _col1 - - - - - - - - VALUE._col0 - - - _col0 - - - - - - - - - - - - - - - - - - - _col0 - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - joinkey0 - - - serialization.lib - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe - - - serialization.sort.order - + - - - columns.types - string - - - - - - - 1 - - - -1 - - - - - reducesinkkey0 - - - - - - - _col0 - - - _col1 - - - - - - - - 1 - - - - - - - - - - - - - - - org.apache.hadoop.mapred.SequenceFileInputFormat - - - org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - - columns.types - string,string - - - escape.delim - \ - - - - - - - - - b - - - RS_7 - - - - - - - - - - - - - - - - VALUE._col0 - - - b - - - - - - string - - - - - - - VALUE._col1 - - - b - - - - - - string - - - - - - - - + - - - - - - _col1 - - - value - - - src2 - - - - - + + + + + + + + + VALUE._col0 + + + b + + + + + + string + + + + + + + VALUE._col1 + + + b + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src2 - - - - - + + src2 + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src2 + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - - - - string - - - - - - - _col1 - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - - - - - - - - - key - - - src2 - - - - - - - - - - - - - 15 - - - - - - - - - - - - - - - - - - - - - key - - - src2 - - - - - - - - - - - - - 25 - - - - - - - - - - - - - - + + + _col0 - - + + - - + + string - - - - - - - key - - - src2 - - - - - - - + + + _col1 - - + + - - + + string - - - - - - - - FIL_15 + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 15 + + + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + 25 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + key + + + src2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_15 + + + + - - - - - - - - - - - - - - - key - - - src2 - - - - - - string - - - - - - - value - - - src2 - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src2 + + + + + + string + + + + + + + value + + + src2 + + + + + + string + + + + + + + + + @@ -1684,450 +1660,337 @@ - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1,_col2,_col3 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + string:string:string:string + + + escape.delim + \ + + + + + + + 1 + + + + + FS_12 + + - - - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string:string:string - - - escape.delim - \ - - - - - - - 1 - - - - - FS_12 - - - - - - - - - - - - - - - - _col0 - - - - - - - - - string - - - - - - - _col1 - - - - - - - - - string - - - - - - - _col2 - - - - - - - - - string - - - - - - - _col3 - - - - - - - - - string - - - - - - - - + - - - - - + + + + + + + + + _col0 + + + + + + + + + string + + + + + + + _col1 + + + + + + + + + string + + + + + + + _col2 + + + + + + + + + string + + + + + + + _col3 + + + + + + + + + string + + + + + + + + + + + + + + _col3 + + _col3 - - - _col3 - - - b - - - - - - - _col2 - - - _col2 - - - b - - - - - + + b - - _col1 - - - _col1 - - - a - - - - - + + - - _col0 - - - _col0 - - - a - - - - - - - - - - - - - - - - - - - - - - - + + _col2 + + + _col2 - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - + + b - - - - SEL_9 - - - - - + + - - - - - - - - c1 - - - _col0 - - - c - - - - - - string - - - - - - - c2 - - - _col1 - - - c - - - - - - string - - - - - - - c3 - - - _col2 - - - c - - - - - - string - - - - - - - c4 - - - _col3 - - - c - - - - - - string - - - - + + _col1 + + + _col1 - - - - - - - - - - - - - - + + a + + + - - - - - - - - - - - - FIL_13 - - - - - - - - - - - - - - - + + _col0 + + _col0 a - + - - string - - - - - _col1 + + + + + + + + - - a + + - - + + - - string + + - - - - _col2 + + + + _col0 - - b + + _col1 - - + + _col2 - - string + + _col3 - - - - _col3 + + + + SEL_9 + + + + + + + + + + + + + + c1 + + + _col0 + + + c + + + + + + string + + - - b + + + + c2 + + + _col1 + + + c + + + + + + string + + - - + + + + c3 + + + _col2 + + + c + + + + + + string + + - - string + + + + c4 + + + _col3 + + + c + + + + + + string + + @@ -2135,9 +1998,110 @@ - - - + + + + + + + + + + + + + + + + + + + + + + + + FIL_13 + + + + + + + + + + + + + + _col0 + + + a + + + + + + string + + + + + + + _col1 + + + a + + + + + + string + + + + + + + _col2 + + + b + + + + + + string + + + + + + + _col3 + + + b + + + + + + string + + + + + + + + + @@ -2319,14 +2283,12 @@ JOIN_8 - - - - - - - - + + + + + + Index: ql/src/test/results/compiler/plan/sample1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample1.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/sample1.q.xml (working copy) @@ -174,577 +174,350 @@ s - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1,_col2,_col3 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + string:string:string:string + + + escape.delim + \ + + + + + + + 1 + + + + + FS_4 + + - - - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string:string:string - - - escape.delim - \ - - - - - - - 1 - - - - - FS_4 - - - - - - - - - - - - - - - - _col0 - - - - - - - - string - - - - - string - - - - - - - _col1 - - - - - - - - - string - - - - - - - _col2 - - - - - - - - - string - - - - - - - _col3 - - - - - - - - - string - - - - - - - - + - - - - - - _col3 - - - hr - - - true - - - s - - - - - + + + + + + + + + _col0 + + + + + + + + string + + + + + string + + + + + + + _col1 + + + + + + + + + string + + + + + + + _col2 + + + + + + + + + string + + + + + + + _col3 + + + + + + + + + string + + + + + + + + + + + + + + _col3 + + + hr - - _col2 - - - ds - - - true - - - s - - - - - + + true - - _col1 - - - value - - - s - - - - - + + s - - _col0 - - - key - - - s - - - - - + + - - - - - - - - - - - - - - - - - + + _col2 + + + ds - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - - + true - - - - SEL_3 - - - - - + + s - - - - - - - - - - key - - - _col0 - - - s - - - - - - string - - - - - - - value - - - _col1 - - - s - - - - - - string - - - - - - - ds - - - _col2 - - - s - - - - - - string - - - - - - - hr - - - _col3 - - - s - - - - - - string - - - - + + - - - - - - - - true - - - - - - - - - - - - - - - - - - - - - - - - - - org.apache.hadoop.hive.ql.udf.UDFRand - - - rand - - - - - - - double - - - - - - - - - - - - - - int - - - - - - - - - - - - 2147483647 - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitAnd - - - & - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - + + _col1 + + + value - - - - - - - 0 - - + + s - - - - - - - - - boolean + + - - - - - - FIL_1 - - - - - - - - - - - - - - - + + _col0 + + key s - + - - string - - - - - value + + + + + + + + - - s + + - - + + - - string + + - - - - ds + + + + _col0 - - s + + _col1 - - + + _col2 - - string + + _col3 - - - - hr + + true + + + + + SEL_3 + + + + + + + + + + + + + + key + + + _col0 + + + s + + + + + + string + + - - s + + + + value + + + _col1 + + + s + + + + + + string + + - - + + + + ds + + + _col2 + + + s + + + + + + string + + - - string + + + + hr + + + _col3 + + + s + + + + + + string + + @@ -752,9 +525,224 @@ - - - + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFRand + + + rand + + + + + + + double + + + + + + + + + + + + + + int + + + + + + + + + + + + 2147483647 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitAnd + + + & + + + + + + + + + + + + + + + 1 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + FIL_1 + + + + + + + + + + + + + + key + + + s + + + + + + string + + + + + + + value + + + s + + + + + + string + + + + + + + ds + + + s + + + + + + string + + + + + + + hr + + + s + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/sample2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample2.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/sample2.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_5 + + + + - - FS_5 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -646,361 +642,349 @@ s - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - s - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - s - - - - - int - - - - + + s + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 + + s + + + + + int - - _col1 - - - true - - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - s - - - - - - int - - - - - - - _col1 - - - s - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - true + + SEL_2 - - - + + + + + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - - - - - - - - - - - - - - - - - - 2147483647 - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitAnd - - - & - - - - - - - - - - - - - - - 2 - - - - + + + _col0 - - + + s - + + + int + - - - + + + _col1 - - 0 + + s + + + + + string + - - - - - - - boolean - - - - - - - - FIL_1 + + + + + true + + + + + + + + + + + + + + + + + + + + + key + + + s + + + + + + + + + + + + + + + + + + + + + + + 2147483647 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitAnd + + + & + + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + + + + FIL_1 + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - int - - - - - - - value - - - s - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + s + + + + + + int + + + + + + + value + + + s + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/sample3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample3.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/sample3.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_5 + + + + - - FS_5 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -646,371 +642,359 @@ s - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - s - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - s - - - - - int - - - - + + s + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 + + s + + + + + int - - _col1 - - - true - - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - s - - - - - - int - - - - - - - _col1 - - - s - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - true + + SEL_2 - - - + + + + + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - - - - - value - - - s - - - - - - - - - - - - - - - - - - - - - - - 2147483647 - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitAnd - - - & - - - - - - - - - - - - - - - 2 - - - - + + + _col0 - - + + s - + + + int + - - - + + + _col1 - - 0 + + s + + + + + string + - - - - - - - boolean - - - - - FIL_1 + + + + + true + + + + + + + + + + + + + + + + + + + + + key + + + s + + + + + + + + + + value + + + s + + + + + + + + + + + + + + + + + + + + + + + 2147483647 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitAnd + + + & + + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + FIL_1 + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - int - - - - - - - value - - - s - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + s + + + + + + int + + + + + + + value + + + s + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/sample4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample4.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/sample4.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_5 + + + + - - FS_5 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -646,361 +642,349 @@ s - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - s - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - s - - - - - int - - - - + + s + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 + + s + + + + + int - - _col1 - - - true - - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - s - - - - - - int - - - - - - - _col1 - - - s - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - true + + SEL_2 - - - + + + + + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - - - - - - - - - - - - - - - - - - 2147483647 - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitAnd - - - & - - - - - - - - - - - - - - - 2 - - - - + + + _col0 - - + + s - + + + int + - - - + + + _col1 - - 0 + + s + + + + + string + - - - - - - - boolean - - - - - - - - FIL_1 + + + + + true + + + + + + + + + + + + + + + + + + + + + key + + + s + + + + + + + + + + + + + + + + + + + + + + + 2147483647 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitAnd + + + & + + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + + + + FIL_1 + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - int - - - - - - - value - - - s - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + s + + + + + + int + + + + + + + value + + + s + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/sample5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample5.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/sample5.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_5 + + + + - - FS_5 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -646,358 +642,346 @@ s - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - s - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - s - - - - - int - - - - + + s + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 + + s + + + + + int - - _col1 - - - true - - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - s - - - - - - int - - - - - - - _col1 - - - s - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - true + + SEL_2 - - - + + + + + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - - - - - - - - - - - - - - - - - - 2147483647 - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitAnd - - - & - - - - - - - - - - - - - - - 5 - - - - + + + _col0 - - + + s - + + + int + - - - + + + _col1 - - 0 + + s + + + + + string + - - - - - - - boolean - - - - - FIL_1 + + + + + true + + + + + + + + + + + + + + + + + + + + + key + + + s + + + + + + + + + + + + + + + + + + + + + + + 2147483647 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitAnd + + + & + + + + + + + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + FIL_1 + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - int - - - - - - - value - - - s - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + s + + + + + + int + + + + + + + value + + + s + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/sample6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample6.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/sample6.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_5 + + + + - - FS_5 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -646,361 +642,349 @@ s - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_3 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - s - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - s - - - - - int - - - - + + s + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 + + s + + + + + int - - _col1 - - - true - - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - s - - - - - - int - - - - - - - _col1 - - - s - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - true + + SEL_2 - - - + + + + + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - - - - - - - - - - - - - - - - - - 2147483647 - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitAnd - - - & - - - - - - - - - - - - - - - 4 - - - - + + + _col0 - - + + s - + + + int + - - - + + + _col1 - - 0 + + s + + + + + string + - - - - - - - boolean - - - - - - - - FIL_1 + + + + + true + + + + + + + + + + + + + + + + + + + + + key + + + s + + + + + + + + + + + + + + + + + + + + + + + 2147483647 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitAnd + + + & + + + + + + + + + + + + + + + 4 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + + + + FIL_1 + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - int - - - - - - - value - - - s - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + s + + + + + + int + + + + + + + value + + + s + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/sample7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample7.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/sample7.q.xml (working copy) @@ -79,137 +79,133 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - name - default.dest1 - - - columns.types - string:string - - - serialization.ddl - struct dest1 { string key, string value} - - - serialization.format - 1 - - - columns - key,value - - - bucket_count - -1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - file.inputformat - org.apache.hadoop.mapred.TextInputFormat - - - location - #### A masked pattern was here #### - - - file.outputformat - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - transient_lastDdlTime - #### A masked pattern was here #### - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + name + default.dest1 + + + columns.types + string:string + + + serialization.ddl + struct dest1 { string key, string value} + + + serialization.format + 1 + + + columns + key,value + + + bucket_count + -1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + file.inputformat + org.apache.hadoop.mapred.TextInputFormat + + + location + #### A masked pattern was here #### + + + file.outputformat + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + transient_lastDdlTime + #### A masked pattern was here #### + + + + + + + 1 + + + + + FS_7 + + + + - - FS_7 - - - - - - - - - - - - - - - - key - - - - - - - - string - - - - - string - - - - - - - value - - - - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + + + + + + string + + + + + string + + + + + + + value + + + + + + + + + string + + + + + + + + + @@ -646,406 +642,394 @@ s - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 150 + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_4 + + - - - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 150 - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_4 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - s - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - s - - - - - int - - - - + + s + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 + + s + + + + + int - - _col1 - - - true - - - SEL_3 - - + + + + + - + + + + - - - - - - - - _col0 - - - s - - - - - - int - - - - - - - _col1 - - - s - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - - + + SEL_3 + + + + + + + + + - - - - - - - - - - - - - - - - - - - key - - - s - - - - - - - - - - - - - - - - - - - - - - - 2147483647 - - - - - - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitAnd - - - & - - - - - - - - - - - - - - - 4 - - - - - - - - - - - - - - - - - - - - 0 - - - - + + + _col0 - - + + s - - - - boolean - - + + + + int + - - - - - - - key - - - s - - - - - - - - - - - - - 100 - - - - + + + _col1 - - + + s - - + + + + string + - - - - - - - - FIL_5 + + + + + + + + + + + + + + + + + + + + + + + + + + + key + + + s + + + + + + + + + + + + + + + + + + + + + + + 2147483647 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitAnd + + + & + + + + + + + + + + + + + + + 4 + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + boolean + + + + + + + + + + + + + key + + + s + + + + + + + + + + + + + 100 + + + + + + + + + + + + + + + + + + + + + + + + + + + FIL_5 + + + + - - - - - - - - - - - - - - - key - - - s - - - - - - int - - - - - - - value - - - s - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + s + + + + + + int + + + + + + + value + + + s + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/subq.q.xml =================================================================== --- ql/src/test/results/compiler/plan/subq.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/subq.q.xml (working copy) @@ -44,109 +44,105 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + string:string + + + + + + + 1 + + + + + FS_7 + + + + - - FS_7 - - - - - - - - - - - - - - - - _col0 - - - - - - - - string - - - - - string - - - - - - - _col1 - - - - - - - - - string - - - - - - - - - - + + + + + + + + + _col0 + + + + + + + + string + + + + + string + + + + + + + _col1 + + + + + + + + + string + + + + + + + + + @@ -525,280 +521,268 @@ unioninput:src - - - - - - - - - + + + + + + + + + + + + 1 + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_4 + + - - - - - 1 - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_4 - - - - - - - - - - - - + - - - - - - _col1 - - - value - - - src - - - - - + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src - - - - - + + src + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src - - true + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - key - - - _col0 - - - src - - - - - - string - - - - - - - value - - - _col1 - - - src - - - - - - string - - - - + + + + _col0 + + _col1 + + + true + - - - - - - - + + SEL_2 + + + + + + + + + - - + + key + + _col0 + src - + + + string + - - - - - int - - + + + value - - 100 + + _col1 + + src + + + + + + string + - - - - - - - boolean - - - - - FIL_5 + + + + + + + + + + + key + + + src + + + + + + + + + + + + int + + + + + 100 + + + + + + + + + + + + boolean + + + + + + + + + FIL_5 + + + + - - - - - - - - - - - - - - - key - - - src - - - - - - string - - - - - - - value - - - src - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/udf1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf1.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/udf1.q.xml (working copy) @@ -153,1663 +153,1651 @@ src - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:string:string:string:string + + + escape.delim + \ + + + + + + + 1 + + + + + FS_3 + + - - - - - #### A masked pattern was here #### + + + + + + + + + + + _col0 + + + + + + + + boolean + + + + + boolean + + + + + + + _col1 + + + + + + + + + boolean + + + + + + + _col2 + + + + + + + + + boolean + + + + + + + _col3 + + + + + + + + + boolean + + + + + + + _col4 + + + + + + + + + boolean + + + + + + + _col5 + + + + + + + + + boolean + + + + + + + _col6 + + + + + + + + + boolean + + + + + + + _col7 + + + + + + + + + boolean + + + + + + + _col8 + + + + + + + + + boolean + + + + + + + _col9 + + + + + + + + + boolean + + + + + + + _col10 + + + + + + + + + boolean + + + + + + + _col11 + + + + + + + + + boolean + + + + + + + _col12 + + + + + + + + + boolean + + + + + + + _col13 + + + + + + + + string + + + + + string + + + + + + + _col14 + + + + + + + + + string + + + + + + + _col15 + + + + + + + + + string + + + + + + + _col16 + + + + + + + + + string + + + + + + + + + + + + + + _col8 + + + + + + + - - 1 + + - - #### A masked pattern was here #### + + + + + + - - true + + .* - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:boolean:string:string:string:string - - - escape.delim - \ - - - - - - - 1 - - - FS_3 + + + + + + true - - - - - - + + org.apache.hadoop.hive.ql.udf.UDFRegExp - - - - - - - - _col0 - - - - - - - - boolean - - - - - boolean - - - - - - - _col1 - - - - - - - - - boolean - - - - - - - _col2 - - - - - - - - - boolean - - - - - - - _col3 - - - - - - - - - boolean - - - - - - - _col4 - - - - - - - - - boolean - - - - - - - _col5 - - - - - - - - - boolean - - - - - - - _col6 - - - - - - - - - boolean - - - - - - - _col7 - - - - - - - - - boolean - - - - - - - _col8 - - - - - - - - - boolean - - - - - - - _col9 - - - - - - - - - boolean - - - - - - - _col10 - - - - - - - - - boolean - - - - - - - _col11 - - - - - - - - - boolean - - - - - - - _col12 - - - - - - - - - boolean - - - - - - - _col13 - - - - - - - - string - - - - - string - - - - - - - _col14 - - - - - - - - - string - - - - - - - _col15 - - - - - - - - - string - - - - - - - _col16 - - - - - - - - - string - - - - - - + + rlike + + + - - - - _col8 - - - - - - - - - - - - + + _col7 + + + + + + + - - - - - - - .* - - + + ab - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFRegExp + + a - - rlike - - - - - - _col7 - - - - - - - - - - ab - - - - - - - - - - a - - - - + + + + true - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFLike - - - like - - + + org.apache.hadoop.hive.ql.udf.UDFLike - - + + like - - _col6 - - - - - - - - - - ab - - + + + + + + + _col6 + + + + + + + - - - - - - - _a% - - + + ab - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFLike + + _a% - - like - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFLike + + + like + - - _col5 - - - - - - - - - - ab - - + + + + + + + _col5 + + + + + + + - - - - - - - \%\_ - - + + ab - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFLike + + \%\_ - - like - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFLike + + + like + - - _col4 - - - - - - - - - - %_ - - + + + + + + + _col4 + + + + + + + - - - - - - - \%\_ - - + + %_ - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFLike + + \%\_ - - like - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFLike + + + like + - - _col3 - - - - - - - - - - ab - - + + + + + + + _col3 + + + + + + + - - - - - - - %a_ - - + + ab - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFLike + + %a_ - - like - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFLike + + + like + - - _col2 - - - - - - - - - - ab - - + + + + + + + _col2 + + + + + + + - - - - - - - %a% - - + + ab - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFLike + + %a% - - like - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFLike + + + like + - - _col1 - - - - - - - - - - b - - + + + + + + + _col1 + + + + + + + - - - - - - - %a% - - + + b - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFLike + + %a% - - like - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFLike + + + like + - - _col9 - - - - - - - - - - a - - + + + + + + + _col9 + + + + + + + - - - - - - - [ab] - - + + a - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFRegExp + + [ab] - - rlike - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFRegExp + + + rlike + - - _col13 - - - - - - - - - - abc - - + + + + + + + _col13 + + + + + + + - - - - - - - b - - + + abc - - - - - - - c - - + + + + + + + + b + - - - - org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + + + - - regexp_replace + + c - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + regexp_replace + - - _col12 - - - - - - - - - - hadoop - - + + + + + + + _col12 + + + + + + + - - - - - - - o* - - + + hadoop - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFRegExp + + o* - - rlike - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFRegExp + + + rlike + - - _col11 - - - - - - - - - - hadoop - - + + + + + + + _col11 + + + + + + + - - - - - - - [a-z]* - - + + hadoop - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFRegExp + + [a-z]* - - rlike - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFRegExp + + + rlike + - - _col10 - - - - - - - - - - - - + + + + + + + _col10 + + + + + + + - - - - - - - [ab] - - + + - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFRegExp + + [ab] - - rlike - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFRegExp + + + rlike + - - _col16 - - - - - - - - - - hadoop - - + + + + + + + _col16 + + + + + + + - - - - - - - (.)[a-z]* - - + + hadoop - - - - - - - $1ive - - + + + + + + + + (.)[a-z]* + - - - - org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + + + - - regexp_replace + + $1ive - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + regexp_replace + - - _col15 - - - - - - - - - - abbbb - - + + + + + + + _col15 + + + + + + + - - - - - - - bb - - + + abbbb - - - - - - - b - - + + + + + + + + bb + - - - - org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + + + - - regexp_replace + + b - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + regexp_replace + - - _col14 - - - - - - - - - - abc - - + + + + + + + _col14 + + + + + + + - - - - - - - z - - + + abc - - - - - - - a - - + + + + + + + + z + - - - - org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + + + - - regexp_replace + + a - - + + + + + + org.apache.hadoop.hive.ql.udf.UDFRegExpReplace + + regexp_replace + - - _col0 - - - - - - - - - - a - - + + + + + + + _col0 + + + + + + + - - - - - - - %a% - - + + a - - - - true + + + + - - org.apache.hadoop.hive.ql.udf.UDFLike + + %a% - - like - - - + + + + + + true + + org.apache.hadoop.hive.ql.udf.UDFLike + + + like + + + + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + _col4 + + + _col5 + + + _col6 + + + _col7 + + + _col8 + + + _col9 + + + _col10 + + + _col11 + + + _col12 + + + _col13 + + + _col14 + + + _col15 + + + _col16 + + + + + + + SEL_2 + + + + + + + + + + + + + + _c0 - - + + _col0 - - + + - - + + boolean - - + + + + + + _c1 - - + + _col1 - - + + - - + + boolean - - + + + + + + _c2 - - + + _col2 - - + + - - + + boolean - - + + + + + + _c3 - - + + _col3 - - + + - - + + boolean - - - - - - - _col0 + + + + _c4 - - _col1 + + _col4 - - _col2 + + - - _col3 + + boolean - - _col4 + + + + + + _c5 - + _col5 - + + + + + boolean + + + + + + + _c6 + + _col6 - + + + + + boolean + + + + + + + _c7 + + _col7 - + + + + + boolean + + + + + + + _c8 + + _col8 - + + + + + boolean + + + + + + + _c9 + + _col9 - - _col10 + + - - _col11 + + boolean - - _col12 + + + + + + _c10 - - _col13 + + _col10 - - _col14 + + - - _col15 + + boolean - - _col16 - - - - - SEL_2 - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - boolean - - + + + _c11 - - - - _c1 - - - _col1 - - - - - - boolean - - + + _col11 - - - - _c2 - - - _col2 - - - - - - boolean - - + + - - - - _c3 - - - _col3 - - - - - - boolean - - + + boolean - - - - _c4 - - - _col4 - - - - - - boolean - - + + + + + + _c12 - - - - _c5 - - - _col5 - - - - - - boolean - - + + _col12 - - - - _c6 - - - _col6 - - - - - - boolean - - + + - - - - _c7 - - - _col7 - - - - - - boolean - - + + boolean - - - - _c8 - - - _col8 - - - - - - boolean - - + + + + + + _c13 - - - - _c9 - - - _col9 - - - - - - boolean - - + + _col13 - - - - _c10 - - - _col10 - - - - - - boolean - - + + - - - - _c11 - - - _col11 - - - - - - boolean - - + + string - - - - _c12 - - - _col12 - - - - - - boolean - - + + + + + + _c14 - - - - _c13 - - - _col13 - - - - - - string - - + + _col14 - - - - _c14 - - - _col14 - - - - - - string - - + + - - - - _c15 - - - _col15 - - - - - - string - - + + string - - - - _c16 - - - _col16 - - - - - - string - - - - - - - - - - - - - - - - - - key + + + _c15 - - src + + _col15 - + + + string + - - - - - int - - + + + _c16 - - 86 + + _col16 + + + + + string + - - - - - - - - FIL_4 + + + + + + + + + + + key + + + src + + + + + + + + + + + + int + + + + + 86 + + + + + + + + + + + + + + + + + FIL_4 + + + + - - - - - - - - - - - - - - - key - - - src - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src + + + + + + string + + + + + + + + + Index: ql/src/test/results/compiler/plan/udf4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf4.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/udf4.q.xml (working copy) @@ -133,1452 +133,1444 @@ dest1 - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - double:double:double:bigint:bigint:bigint:double:double:double:bigint:bigint:bigint:bigint:double:int:int:int:int:int - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + serialization.format + 1 + + + columns.types + double:double:double:bigint:bigint:bigint:double:double:double:bigint:bigint:bigint:bigint:double:int:int:int:int:int + + + escape.delim + \ + - - 1 - - - FS_2 + + 1 - + + + + FS_2 + + + + + + + + + - - - - - - - - - - - - _col0 - - - - - - - - double - - - - - double - - + + + _col0 - - - - _col1 - - - - - - - - - double - - + + - - - - _col2 - - - - - - - + + double - - - - _col3 - - - - - - - - bigint - - - - - bigint - - + + double - - - - _col4 - - - - - - - - - bigint - - + + + + + + _col1 - - - - _col5 - - - - - - - - - bigint - - + + - - - - _col6 - - - - - - - - - double - - + + - - - - _col7 - - - - - - - - - double - - + + double - - - - _col8 - - - - - - - - - double - - + + + + + + _col2 - - - - _col9 - - - - - - - - - bigint - - + + - - - - _col10 - - - - - - - - - bigint - - + + - - - - _col11 - - - - - - - - - bigint - - + + double - - - - _col12 - - - - - - - - - bigint - - + + + + + + _col3 - - - - _col13 - - - - - - - - - double - - + + - - - - _col14 - - - - - - - - int - - - + + - int + bigint - - - - _col15 - - - - - - - - - int - - + + bigint - - - - _col16 - - - - - - - - - int - - - - - - - _col17 - - - - - - - - - int - - - - - - - _col18 - - - - - - - - - int - - - - - - - - - - - - - _col8 - - - - - - + + + _col4 - - 0.0 + + + + + + + bigint + - - - - - - org.apache.hadoop.hive.ql.udf.UDFSqrt - - - sqrt - - - - - - - - - - _col7 - - - - - - - - - - - - - 1.0 - - - - + + + _col5 - - + + - - + + + + bigint + - - - - - - org.apache.hadoop.hive.ql.udf.UDFSqrt - - - sqrt - - - - - - - - - - _col6 - - - - - + + + _col6 + + + + + - - 1.0 + + double - - - - - - org.apache.hadoop.hive.ql.udf.UDFSqrt - - - sqrt - - - - - - - - - - _col5 - - - - - - - - - - - - - 1.5 - - - - + + + _col7 - - + + - + + + double + - - - - - - - - - - - - _col4 - - - - - - + + + _col8 - - 1.5 + + - - - - - - - - - - - - - - _col3 - - - - - - + - - 1.0 + + double - - - - - - - - - - - - _col2 - - - - - - - - - - - - - 1.5 - - - - + + + _col9 - - + + - - + + + + bigint + - - - - - - - - - - - - _col1 - - - - - - + + + _col10 - - 1.5 + + - - - - - - - - - - - - - - _col9 - - - - - - - + + - - 1.0 + + bigint - - - - - - - - - - - - _col13 - - - - - - + + + _col11 - - 3 + + - - - - - - - - org.apache.hadoop.hive.ql.udf.UDFRand - - - rand - - - - - - - - - - _col12 - - - - - - - + + - - 1.0 + + bigint - - - - - - - - - - - - _col11 - - - - - - - - - - - - - 1.5 - - - - + + + _col12 - - + + - - + + + + bigint + - - - - - - - - - - - - _col10 - - - - - + + + _col13 + + + + + - - 1.5 + + double - - - - - - - - - - - - _col17 - - - - - - + + + _col14 - - 1 + + - - - - - - - - - - - - - 2 - - + + + + int - - + + int - - - - - - - - - - - - - - - _col16 - - - - - + + + _col15 + + + + + - - 1 + + int - - + + + _col16 + + + + + - - 2 + + int - - - - - - - - - - - - _col15 - - - - - + + + _col17 + + + + + - - 3 + + int - - - - - - - - - - - - _col14 - - - - - - 3 - - - - - _col0 - - - - - - + + + _col18 - - 1.0 + + - - - - - - - - - - - - - - _col18 - - - - - - + - - 1 + + int - - - - true - - - org.apache.hadoop.hive.ql.udf.UDFOPBitNot - - - ~ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - _col2 - - - _col3 - - - _col4 - - - _col5 - - - _col6 - - - _col7 - - - _col8 - - - _col9 - - - _col10 - - - _col11 - - - _col12 - - - _col13 - - - _col14 - - - _col15 - - - _col16 - - - _col17 - - - _col18 - - - - + + + + + _col8 + + + + + + + + + + 0.0 + + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFSqrt + + + sqrt + + + + + + + + + + _col7 + + + + + + + + + + + + + + 1.0 + + + + + + + + + + + + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFSqrt + + + sqrt + + + + + + + + + + _col6 + + + + + + + + + + 1.0 + + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFSqrt + + + sqrt + + + + + + + + + + _col5 + + + + + + + + + + + + + + 1.5 + + + + + + + + + + + + + + + + + + + + + + + + + _col4 + + + + + + + + + + 1.5 + + + + + + + + + + + + + + + _col3 + + + + + + + + + + 1.0 + + + + + + + + + + + + + + + _col2 + + + + + + + + + + + + + + 1.5 + + + + + + + + + + + + + + + + + + + + + + + + + _col1 + + + + + + + + + + 1.5 + + + + + + + + + + + + + + + _col9 + + + + + + + + + + 1.0 + + + + + + + + + + + + + + + _col13 + + + + + + + + + + 3 + + + + + + + + + org.apache.hadoop.hive.ql.udf.UDFRand + + + rand + + + + + + + + + + _col12 + + + + + + + + + + 1.0 + + + + + + + + + + + + + + + _col11 + + + + + + + + + + + + + + 1.5 + + + + + + + + + + + + + + + + + + + + + + + + + _col10 + + + + + + + + + + 1.5 + + + + + + + + + + + + + + + _col17 + + + + + + + + + + 1 + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + + + + + + + + + _col16 + + + + + + + + + + 1 + + + + + + + + + + 2 + + + + + + + + + + + + + + + _col15 + + + + + + + + + + 3 + + + + + + + + + + + + + + + _col14 + + + + + + 3 + + + + + _col0 + + + + + + + + + + 1.0 + + + + + + + + + + + + + + + _col18 + + + + + + + + + + 1 + + + + + + + + + true + + + org.apache.hadoop.hive.ql.udf.UDFOPBitNot + + + ~ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + _col2 + + + _col3 + + + _col4 + + + _col5 + + + _col6 + + + _col7 + + + _col8 + + + _col9 + + + _col10 + + + _col11 + + + _col12 + + + _col13 + + + _col14 + + + _col15 + + + _col16 + + + _col17 + + + _col18 + + + + + + + SEL_1 + + + + - - SEL_1 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - double - - - - - - - _c1 - - - _col1 - - - - - - double - - - - - - - _c2 - - - _col2 - - - - - - double - - - - - - - _c3 - - - _col3 - - - - - - bigint - - - - - - - _c4 - - - _col4 - - - - - - bigint - - - - - - - _c5 - - - _col5 - - - - - - bigint - - - - - - - _c6 - - - _col6 - - - - - - double - - - - - - - _c7 - - - _col7 - - - - - - double - - - - - - - _c8 - - - _col8 - - - - - - double - - - - - - - _c9 - - - _col9 - - - - - - bigint - - - - - - - _c10 - - - _col10 - - - - - - bigint - - - - - - - _c11 - - - _col11 - - - - - - bigint - - - - - - - _c12 - - - _col12 - - - - - - bigint - - - - - - - _c13 - - - _col13 - - - - - - double - - - - - - - _c14 - - - _col14 - - - - - - int - - - - - - - _c15 - - - _col15 - - - - - - int - - - - - - - _c16 - - - _col16 - - - - - - int - - - - - - - _c17 - - - _col17 - - - - - - int - - - - - - - _c18 - - - _col18 - - - - - - int - - - - - - - - - - + + + + + + + + + _c0 + + + _col0 + + + + + + double + + + + + + + _c1 + + + _col1 + + + + + + double + + + + + + + _c2 + + + _col2 + + + + + + double + + + + + + + _c3 + + + _col3 + + + + + + bigint + + + + + + + _c4 + + + _col4 + + + + + + bigint + + + + + + + _c5 + + + _col5 + + + + + + bigint + + + + + + + _c6 + + + _col6 + + + + + + double + + + + + + + _c7 + + + _col7 + + + + + + double + + + + + + + _c8 + + + _col8 + + + + + + double + + + + + + + _c9 + + + _col9 + + + + + + bigint + + + + + + + _c10 + + + _col10 + + + + + + bigint + + + + + + + _c11 + + + _col11 + + + + + + bigint + + + + + + + _c12 + + + _col12 + + + + + + bigint + + + + + + + _c13 + + + _col13 + + + + + + double + + + + + + + _c14 + + + _col14 + + + + + + int + + + + + + + _c15 + + + _col15 + + + + + + int + + + + + + + _c16 + + + _col16 + + + + + + int + + + + + + + _c17 + + + _col17 + + + + + + int + + + + + + + _c18 + + + _col18 + + + + + + int + + + + + + + + + Index: ql/src/test/results/compiler/plan/udf6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf6.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/udf6.q.xml (working copy) @@ -153,297 +153,289 @@ src - - - - - - - - - - - #### A masked pattern was here #### + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat - - 1 + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat + + + + hive.serialization.extend.nesting.levels + true - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + columns + _col0,_col1 - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:int - - - escape.delim - \ - - + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - - 1 - - - - - FS_2 - - - - - - - - - - - - - - - - _col0 - - - - - - - - string - - - - - string - - + + serialization.format + 1 - - - - _col1 - - - - - - - - int - - - - - int - - + + columns.types + string:int + + escape.delim + \ + + + 1 + - - - - - - _col1 - - + + FS_2 + + + + + + + + + - - - + + + _col0 + + + + + + - boolean + string - - true + + string - - - + + + _col1 - - 1 + + - - - - - - + + + + int + + - - 2 + + int - - - - - - - - _col0 - - - - - - - - - - a - - - - - - - - - - b - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - - + + + + + _col1 + + + + + + + + + boolean + + + + + true + + + + + + + + + + 1 + + + + + + + + + + 2 + + + + + + + + + + + + + + + _col0 + + + + + + + + + + a + + + + + + + + + + b + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + - - SEL_1 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - string - - - - - - - _c1 - - - _col1 - - - - - - int - - - - - - - - - - + + + + + + + + + _c0 + + + _col0 + + + + + + string + + + + + + + _c1 + + + _col1 + + + + + + int + + + + + + + + + Index: ql/src/test/results/compiler/plan/udf_case.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf_case.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/udf_case.q.xml (working copy) @@ -153,381 +153,369 @@ src - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + int:int + + + escape.delim + \ + + + + + + + 1 + + + + + FS_3 + + - - - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - int:int - - - escape.delim - \ - - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - - - - - _col0 - - - - - - - - int - - - - - int - - - - - - - _col1 - - - - - - - - - int - - - - - - - - + - - - - - - 1 - - - - - LIM_2 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - int - - - - - - - _c1 - - - _col1 - - - - - - int - - - - - - - - + + + + + + + + + _col0 + + + + + + + + int + + + + + int + + + + + + + _col1 + + + + + + + + + int + + + + + + + + + - - - - - - _col1 - - - - - - - - - - 11 - - - - - - - - - - 12 - - - - - - - - - - 13 - - - - - - - - - - 14 - - - - - - - - - - 15 - - - - + + + + 1 - - - - - - - - _col0 - - - + + LIM_2 + + + + + + + + + + - - - + + + _c0 - - 1 + + _col0 - - - - - + - - 1 + + int - - - + + + _c1 - - 2 + + _col1 - - - - - + - - 3 + + int - - - - - - - 4 - - - - - - - - - - 5 - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - - + + + + + _col1 + + + + + + + + + + 11 + + + + + + + + + + 12 + + + + + + + + + + 13 + + + + + + + + + + 14 + + + + + + + + + + 15 + + + + + + + + + + + + + + + _col0 + + + + + + + + + + 1 + + + + + + + + + + 1 + + + + + + + + + + 2 + + + + + + + + + + 3 + + + + + + + + + + 4 + + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + - - SEL_1 - - - - - - - - - - - - - - - - - - + + + + + + + + + + Index: ql/src/test/results/compiler/plan/udf_when.q.xml =================================================================== --- ql/src/test/results/compiler/plan/udf_when.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/udf_when.q.xml (working copy) @@ -153,461 +153,449 @@ src - - - - - - - - - + + + + + + + + + + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + hive.serialization.extend.nesting.levels + true + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + int:int + + + escape.delim + \ + + + + + + + 1 + + + + + FS_3 + + - - - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - hive.serialization.extend.nesting.levels - true - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - int:int - - - escape.delim - \ - - - - - - - 1 - - - - - FS_3 - - - - - - - - - - - - - - - - _col0 - - - - - - - - int - - - - - int - - - - - - - _col1 - - - - - - - - - int - - - - - - - - + - - - - - - 1 - - - - - LIM_2 - - - - - - - - - - - - - - - - _c0 - - - _col0 - - - - - - int - - - - - - - _c1 - - - _col1 - - - - - - int - - - - - - - - + + + + + + + + + _col0 + + + + + + + + int + + + + + int + + + + + + + _col1 + + + + + + + + + int + + + + + + + + + - - - - - - _col1 - - - - - - - - - - - - - - 12 - - - - - - - - - - 11 - - - - - - - - - - - - boolean - - - - - - - - - - - - 13 - - - - - - - - - - - - - - 14 - - - - - - - - - - 10 - - - - - - - - - - - - - - - - - - - - 15 - - - - + + + + 1 - - - - - - - - _col0 - - - + + LIM_2 + + + + + + + + + + - - - - - - - - - - 1 - - - - - - - - - - 1 - - - - + + + _c0 - - + + _col0 - - - - - - - - + - - 2 + + int - - - - - - - - - - 3 - - - - - - - - - - 5 - - - - + + + _c1 - - + + _col1 - - - - - - - - + - - 4 + + int - - - - - - - 5 - - - - - - - - - - - - - - - - - - - - - - - - - _col0 - - - _col1 - - - - + + + + + _col1 + + + + + + + + + + + + + + 12 + + + + + + + + + + 11 + + + + + + + + + + + + boolean + + + + + + + + + + + + 13 + + + + + + + + + + + + + + 14 + + + + + + + + + + 10 + + + + + + + + + + + + + + + + + + + + 15 + + + + + + + + + + + + + + + _col0 + + + + + + + + + + + + + + 1 + + + + + + + + + + 1 + + + + + + + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + 3 + + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + + + 4 + + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + + + SEL_1 + + + + - - SEL_1 - - - - - - - - - - - - - - - - - - + + + + + + + + + + Index: ql/src/test/results/compiler/plan/union.q.xml =================================================================== --- ql/src/test/results/compiler/plan/union.q.xml (revision 1555253) +++ ql/src/test/results/compiler/plan/union.q.xml (working copy) @@ -44,109 +44,105 @@ #### A masked pattern was here #### - - - - - - - #### A masked pattern was here #### - - - 1 - - - - - org.apache.hadoop.mapred.TextInputFormat - - - org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - - - - columns - _col0,_col1 - - - serialization.lib - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - - serialization.format - 1 - - - columns.types - string:string - - - - - - - 1 - - + + + + + + #### A masked pattern was here #### + + + 1 + + + + + org.apache.hadoop.mapred.TextInputFormat + + + org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + + + + columns + _col0,_col1 + + + serialization.lib + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + + serialization.format + 1 + + + columns.types + string:string + + + + + + + 1 + + + + + FS_12 + + + + - - FS_12 - - - - - - - - - - - - - - - - _col0 - - - - - - - - string - - - - - string - - - - - - - _col1 - - - - - - - - - string - - - - - - - - - - + + + + + + + + + _col0 + + + + + + + + string + + + + + string + + + + + + + _col1 + + + + + + + + + string + + + + + + + + + @@ -661,471 +657,431 @@ null-subquery1:unioninput-subquery1:src - - - - - - - - - + + + + + + + + + - + - - - - - - - - - - - 1 - - - #### A masked pattern was here #### - - - 1 - - - #### A masked pattern was here #### - - - true - - - - - - 1 - - - - - FS_8 - - - - - - - - - - - - - - + + + + + + 1 + + + #### A masked pattern was here #### + + + 1 + + + #### A masked pattern was here #### + + + true + + + + + + 1 + + + + + FS_8 + + + + - - - - _col1 - - - _col1 - - - src - - - - - + + + + + + + + + + + _col1 + + + _col1 + + + src + + + + + + + + _col0 + + + _col0 + + + src + + + + + + + + + + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + true + + + + + SEL_7 + + + + + + + + + + + + + + key - + _col0 - - - _col0 - - - src - - - - - + + src + + + + + + string + - - - - - - - - - - - + + + + value - - - - _col0 - - - _col1 - - + + _col1 - - true + + src + + + + + string + - - SEL_7 + + + + + + + + + + + + UNION_6 + + + + + + + + + + + + + + + + _col1 + + + value - - - - - - + + src - - - - - - - - key - - - _col0 - - - src - - - - - - string - - - - - - - value - - - _col1 - - - src - - - - - - string - - - - - - + + + + _col0 + + + key + + + src + + + + + + - + + + + + + + + + + + + + + + _col0 + + + _col1 + + + + + true + + - UNION_6 + SEL_5 - - - - - - - - - - - - + + + + + - - - - _col1 - - - value - - - src - - - - - + + + + + + + + + + + key + + + src + + + + + + + + + + + + int + + + + + 100 + + + + + + + + + + + + boolean + + + + + + + + + FIL_10 + + + + + + + + - - _col0 - - - key - - + + + src - - + + - - - - - + - + 0 - + 1 - + - _col0 + key - _col1 + value - - true + + TS_3 - - - - SEL_5 - - - - - - + + + - + + + key + + + src + + + + + + string + + - - - - - - - - - - - - key - - - src - - - - - - - - - - - - int - - - - - 100 - - - - + + + + value - - + + src - - - - boolean - - + + + + string + - - - - FIL_10 - - - - - - - - - - + + + true - - - - src - - - - - + + BLOCK__OFFSET__INSIDE__FILE - - - - 0 - - - 1 - - + + src - - - - key + + + + bigint - - value - - - TS_3 + + bigint - - - - - - - - key - - - src - - - - - - string - - - - - - - value - - - src - - - - - - string - - - - - - - true - - - BLOCK__OFFSET__INSIDE__FILE - - - src - - - - - bigint - - - - - bigint - - - - - - - true - - - INPUT__FILE__NAME - - - src - - - - - - string - - - - - - - - - - - - - - - + + + + true - - + + INPUT__FILE__NAME + + src + + + + + + string + @@ -1134,49 +1090,23 @@ - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - - - - - - - + + + + + + + + + + + + + + + + + @@ -1220,212 +1150,248 @@ - - - - - - _col1 - - - value - - - src - - - - - + + + + + + + + + _col0 + + + src + + + + + + string + + + + + + + _col1 + + + src + + + + + + string + + + + + + + + + + + + + + _col1 + + + value - - _col0 - - - key - - - src - - - - - + + src + + + - - - - - - - - - - - + + _col0 + + + key - - - - _col0 - - - _col1 - - + + src + + + - - SEL_2 - - + + + + + - + + + + - - - - - - - - _col0 - - - src - - - - - - string - - - - - - - _col1 - - - src - - - - - - string - - - - + + + + _col0 + + _col1 + - - - - - - - + + SEL_2 + + + + + + + + + - - - key + + + _col0 src - + + + string + - - - + + + _col1 - - 100 + + src + + + + + string + - - - - - - - - FIL_9 + + + + + + + + + + + key + + + src + + + + + + + + + + + + + 100 + + + + + + + + + + + + + + + + + FIL_9 + + + + - - - - - - - - - - - - - - - key - - - src - - - - - - string - - - - - - - value - - - src - - - - - - string - - - - - - - - - - + + + + + + + + + key + + + src + + + + + + string + + + + + + + value + + + src + + + + + + string + + + + + + + + +