diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 6d0cf15..dfe3c12 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1046,6 +1046,9 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "job, process those skewed keys. The same key need not be skewed for all the tables, and so,\n" + "the follow-up map-reduce job (for the skewed keys) would be much faster, since it would be a\n" + "map-join."), + HIVEDYNAMICPARTITIONHASHJOIN("hive.optimize.dynamic.partition.hashjoin", false, + "Whether to enable dynamically partitioned hash join optimization. \n" + + "This setting is also dependent on enabling hive.auto.convert.join"), HIVECONVERTJOIN("hive.auto.convert.join", true, "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size"), HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true, diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 441b278..6da302e 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -332,6 +332,10 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_dml.q,\ tez_fsstat.q,\ tez_insert_overwrite_local_directory_1.q,\ + tez_dynpart_hashjoin_1.q,\ + tez_dynpart_hashjoin_2.q,\ + tez_vector_dynpart_hashjoin_1.q,\ + tez_vector_dynpart_hashjoin_2.q,\ tez_join_hash.q,\ tez_join_result_complex.q,\ tez_join_tests.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 15cafdd..51c641d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.Future; @@ -55,6 +56,11 @@ import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; @@ -152,7 +158,7 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { final ExecMapperContext mapContext = getExecContext(); final MapredContext mrContext = MapredContext.get(); - if (!conf.isBucketMapJoin()) { + if (!conf.isBucketMapJoin() && !conf.isDynamicPartitionHashJoin()) { /* * The issue with caching in case of bucket map join is that different tasks * process different buckets and if the container is reused to join a different bucket, @@ -656,4 +662,32 @@ static public String getOperatorName() { public OperatorType getType() { return OperatorType.MAPJOIN; } + + /** + * This should only be needed for vectorized dynamic partitioned hash joins. + * During query execution, the vectorized rowObjectInspector created by ReduceRecordSource + * does not match the schema expected by the ExprNodeDescs in the MapJoin, which messes up + * the initialization done during initializeOp(). This seems to break Operator initialization + * because the keys/exprs try to validate against the rowObjectInspector schema, but the + * key/value structs do not exist in the rowObjectInspector. + * This is a bit of a hack to change the ExprNodeDescs to match the schema of the run-time + * rowObjectInspector in the vectorized case. + * This also requires changes in the ReduceRecordSource, to change the field names in the + * rowObjectInspector to match the naming convention here. + */ + protected void flattenExpressionsIfNecessary() { + if (conf.isDynamicPartitionHashJoin()) { + // Keys + for (Map.Entry> mapEntry : conf.getKeys().entrySet()) { + List newKeys = ExprNodeDescUtils.flattenExprList(mapEntry.getValue()); + conf.getKeys().put(mapEntry.getKey(), newKeys); + } + + // Expressions + for (Map.Entry> mapEntry : conf.getExprs().entrySet()) { + List newExprs = ExprNodeDescUtils.flattenExprList(mapEntry.getValue()); + conf.getExprs().put(mapEntry.getKey(), newExprs); + } + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index d7f1b42..0f02737 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -1354,4 +1354,10 @@ public OperatorType getType() { return childOperators; } } + + public void removeParents() { + for (Operator parent : new ArrayList>(getParentOperators())) { + removeParent(parent); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesAdapter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesAdapter.java new file mode 100644 index 0000000..8f706fe --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesAdapter.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez; + +import java.io.IOException; + +/** + * Key-values interface for the Reader used by ReduceRecordSource + */ +public interface KeyValuesAdapter { + /** + * Get the key for current record + * @return + * @throws IOException + */ + Object getCurrentKey() throws IOException; + + /** + * Get the values for the current record + * @return + * @throws IOException + */ + Iterable getCurrentValues() throws IOException; + + /** + * Move to the next record + * @return true if successful, false if there are no more records to process + * @throws IOException + */ + boolean next() throws IOException; +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesFromKeyValue.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesFromKeyValue.java new file mode 100644 index 0000000..51cdeca --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesFromKeyValue.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; +import org.apache.tez.runtime.library.api.KeyValueReader; + +/** + * Provides a key/values (note the plural values) interface out of a KeyValueReader, + * needed by ReduceRecordSource when reading input from a key/value source. + */ +public class KeyValuesFromKeyValue implements KeyValuesAdapter { + protected KeyValueReader reader; + protected ValueIterator valueIterator = + new ValueIterator(); + + private static class ValueIterator implements Iterator, Iterable { + + protected boolean hasNextValue = false; + protected T value = null; + + @Override + public boolean hasNext() { + return hasNextValue; + } + + @Override + public T next() { + if (!hasNextValue) { + throw new NoSuchElementException(); + } + hasNextValue = false; + return value; + } + + void reset(T value) { + this.value = value; + hasNextValue = true; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public Iterator iterator() { + return this; + } + } + + public KeyValuesFromKeyValue(KeyValueReader reader) { + this.reader = reader; + } + + @Override + public Object getCurrentKey() throws IOException { + return reader.getCurrentKey(); + } + + @Override + public Iterable getCurrentValues() throws IOException { + Object obj = reader.getCurrentValue(); + valueIterator.reset(obj); + return valueIterator; + } + + @Override + public boolean next() throws IOException { + return reader.next(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesFromKeyValues.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesFromKeyValues.java new file mode 100644 index 0000000..b027bce --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/KeyValuesFromKeyValues.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez; + +import java.io.IOException; +import org.apache.tez.runtime.library.api.KeyValuesReader; + +/** + * Provides a key/values interface out of a KeyValuesReader for use by ReduceRecordSource. + */ +public class KeyValuesFromKeyValues implements KeyValuesAdapter { + protected KeyValuesReader reader; + + public KeyValuesFromKeyValues(KeyValuesReader reader) { + this.reader = reader; + } + + @Override + public Object getCurrentKey() throws IOException { + return reader.getCurrentKey(); + } + + @Override + public Iterable getCurrentValues() throws IOException { + return reader.getCurrentValues(); + } + + @Override + public boolean next() throws IOException { + return reader.next(); + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 545d7c6..fda6b98 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -50,6 +50,7 @@ import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.api.LogicalOutput; import org.apache.tez.runtime.api.ProcessorContext; +import org.apache.tez.runtime.api.Reader; import org.apache.tez.runtime.library.api.KeyValuesReader; /** @@ -129,10 +130,11 @@ void init( tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork); } - bigTablePosition = (byte) reduceWork.getTag(); ((TezContext) MapredContext.get()).setDummyOpsMap(connectOps); } + bigTablePosition = (byte) reduceWork.getTag(); + ObjectInspector[] mainWorkOIs = null; ((TezContext) MapredContext.get()).setInputs(inputs); ((TezContext) MapredContext.get()).setTezProcessorContext(processorContext); @@ -227,7 +229,7 @@ private void initializeSourceForTag(ReduceWork redWork, int tag, ObjectInspector reducer.setParentOperators(null); // clear out any parents as reducer is the root TableDesc keyTableDesc = redWork.getKeyDesc(); - KeyValuesReader reader = (KeyValuesReader) inputs.get(inputName).getReader(); + Reader reader = inputs.get(inputName).getReader(); sources[tag] = new ReduceRecordSource(); sources[tag].init(jconf, redWork.getReducer(), redWork.getVectorMode(), keyTableDesc, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 7d79e87..b634877 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -57,6 +57,8 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; +import org.apache.tez.runtime.api.Reader; +import org.apache.tez.runtime.library.api.KeyValueReader; import org.apache.tez.runtime.library.api.KeyValuesReader; /** @@ -107,7 +109,7 @@ /* this is only used in the error code path */ private List valueStringWriters; - private KeyValuesReader reader; + private KeyValuesAdapter reader; private boolean handleGroupKey; @@ -120,7 +122,7 @@ private final GroupIterator groupIterator = new GroupIterator(); void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, - TableDesc valueTableDesc, KeyValuesReader reader, boolean handleGroupKey, byte tag, + TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, Map vectorScratchColumnTypeMap) throws Exception { @@ -129,7 +131,11 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT this.reducer = reducer; this.vectorized = vectorized; this.keyTableDesc = keyTableDesc; - this.reader = reader; + if (reader instanceof KeyValueReader) { + this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader); + } else { + this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader); + } this.handleGroupKey = handleGroupKey; this.tag = tag; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index e9bd44a..7a0b15e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -100,6 +100,9 @@ public VectorMapJoinOperator (VectorizationContext vContext, OperatorDesc conf) @Override public Collection> initializeOp(Configuration hconf) throws HiveException { + // Distributed hash join needs its ExprNodeDescs flattened because vectorized + // rowObjectInspector is flattened out by ReduceRecordSource (no nested key, value structs) + flattenExpressionsIfNecessary(); // Use a final variable to properly parameterize the processVectorInspector closure. // Using a member variable in the closure will not do the right thing... diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 3780113..281e0e9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -211,12 +211,14 @@ public static VectorizedRowBatch constructVectorizedRowBatch( ArrayList ois = new ArrayList(); List fields = keyInspector.getAllStructFieldRefs(); for (StructField field: fields) { - colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); + // Match the naming convention done by ExprNodeDesc flattening for dynamic partitioned hash joins + colNames.add(Utilities.ReduceField.KEY.toString() + "_" + field.getFieldName()); ois.add(field.getFieldObjectInspector()); } fields = valueInspector.getAllStructFieldRefs(); for (StructField field: fields) { - colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName()); + // Match the naming convention done by ExprNodeDesc flattening for dynamic partitioned hash joins + colNames.add(Utilities.ReduceField.VALUE.toString() + "_" + field.getFieldName()); ois.add(field.getFieldObjectInspector()); } StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index 4c8c4b1..4bc7406 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -551,6 +551,10 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { @Override protected Collection> initializeOp(Configuration hconf) throws HiveException { + // Distributed hash join needs its ExprNodeDescs flattened because vectorized + // rowObjectInspector is flattened out by ReduceRecordSource (no nested key, value structs) + flattenExpressionsIfNecessary(); + Collection> result = super.initializeOp(hconf); if (LOG.isDebugEnabled()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 5a87bd6..95cbd3c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -93,9 +93,7 @@ if (retval == null) { return retval; } else { - int pos = 0; // it doesn't matter which position we use in this case. - convertJoinSMBJoin(joinOp, context, pos, 0, false); - return null; + fallbackToReduceSideJoin(joinOp, context); } } @@ -136,7 +134,7 @@ } else { // only case is full outer join with SMB enabled which is not possible. Convert to regular // join. - convertJoinSMBJoin(joinOp, context, 0, 0, false); + fallbackToReduceSideJoin(joinOp, context); return null; } } @@ -155,12 +153,11 @@ if (mapJoinConversionPos < 0) { // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. - int pos = 0; // it doesn't matter which position we use in this case. - convertJoinSMBJoin(joinOp, context, pos, 0, false); + fallbackToReduceSideJoin(joinOp, context); return null; } - MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos); + MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true); // map join operator by default has no bucket cols and num of reduce sinks // reduced by 1 mapJoinOp @@ -180,7 +177,7 @@ private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperat // we cannot convert to bucket map join, we cannot convert to // map join either based on the size. Check if we can convert to SMB join. if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN) == false) { - convertJoinSMBJoin(joinOp, context, 0, 0, false); + fallbackToReduceSideJoin(joinOp, context); return null; } Class bigTableMatcherClass = null; @@ -209,8 +206,7 @@ private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperat // contains aliases from sub-query // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. - int pos = 0; // it doesn't matter which position we use in this case. - convertJoinSMBJoin(joinOp, context, pos, 0, false); + fallbackToReduceSideJoin(joinOp, context); return null; } @@ -220,8 +216,7 @@ private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperat } else { // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. - int pos = 0; // it doesn't matter which position we use in this case. - convertJoinSMBJoin(joinOp, context, pos, 0, false); + fallbackToReduceSideJoin(joinOp, context); } return null; } @@ -338,7 +333,7 @@ private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcCon return false; } - MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition); + MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true); MapJoinDesc joinDesc = mapJoinOp.getConf(); joinDesc.setBucketMapJoin(true); @@ -633,7 +628,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c */ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, - int bigTablePosition) throws SemanticException { + int bigTablePosition, boolean removeReduceSink) throws SemanticException { // bail on mux operator because currently the mux operator masks the emit keys // of the constituent reduce sinks. for (Operator parentOp : joinOp.getParentOperators()) { @@ -646,45 +641,49 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), - joinOp.getConf().getMapAliases(), bigTablePosition, true); + joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink); mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, - HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)); + HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)); Operator parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition); if (parentBigTableOp instanceof ReduceSinkOperator) { - for (Operator p : parentBigTableOp.getParentOperators()) { - // we might have generated a dynamic partition operator chain. Since - // we're removing the reduce sink we need do remove that too. - Set> dynamicPartitionOperators = new HashSet>(); - Map, AppMasterEventOperator> opEventPairs = new HashMap<>(); - for (Operator c : p.getChildOperators()) { - AppMasterEventOperator event = findDynamicPartitionBroadcast(c); - if (event != null) { - dynamicPartitionOperators.add(c); - opEventPairs.put(c, event); + if (removeReduceSink) { + for (Operator p : parentBigTableOp.getParentOperators()) { + // we might have generated a dynamic partition operator chain. Since + // we're removing the reduce sink we need do remove that too. + Set> dynamicPartitionOperators = new HashSet>(); + Map, AppMasterEventOperator> opEventPairs = new HashMap<>(); + for (Operator c : p.getChildOperators()) { + AppMasterEventOperator event = findDynamicPartitionBroadcast(c); + if (event != null) { + dynamicPartitionOperators.add(c); + opEventPairs.put(c, event); + } } - } - for (Operator c : dynamicPartitionOperators) { - if (context.pruningOpsRemovedByPriorOpt.isEmpty() || - !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) { - p.removeChild(c); - // at this point we've found the fork in the op pipeline that has the pruning as a child plan. - LOG.info("Disabling dynamic pruning for: " - + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() - + ". Need to be removed together with reduce sink"); + for (Operator c : dynamicPartitionOperators) { + if (context.pruningOpsRemovedByPriorOpt.isEmpty() || + !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) { + p.removeChild(c); + // at this point we've found the fork in the op pipeline that has the pruning as a child plan. + LOG.info("Disabling dynamic pruning for: " + + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + + ". Need to be removed together with reduce sink"); + } + } + for (Operator op : dynamicPartitionOperators) { + context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op)); } } - for (Operator op : dynamicPartitionOperators) { - context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op)); + + mapJoinOp.getParentOperators().remove(bigTablePosition); + if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) { + mapJoinOp.getParentOperators().add(bigTablePosition, + parentBigTableOp.getParentOperators().get(0)); } + parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp); } - mapJoinOp.getParentOperators().remove(bigTablePosition); - if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) { - mapJoinOp.getParentOperators().add(bigTablePosition, - parentBigTableOp.getParentOperators().get(0)); - } - parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp); + for (Operatorop : mapJoinOp.getParentOperators()) { if (!(op.getChildOperators().contains(mapJoinOp))) { op.getChildOperators().add(mapJoinOp); @@ -720,4 +719,43 @@ private AppMasterEventOperator findDynamicPartitionBroadcast(Operator parent) return null; } + + private static int getNumReducers(JoinOperator joinOp) { + int numReducers = 1; + for (OperatorparentOp : joinOp.getParentOperators()) { + if (parentOp instanceof ReduceSinkOperator) { + ReduceSinkOperator rs = (ReduceSinkOperator) parentOp; + int rsReducers = rs.getConf().getNumReducers(); + if (rsReducers > numReducers) { + numReducers = rsReducers; + } + } + } + return numReducers; + } + + private void fallbackToReduceSideJoin(JoinOperator joinOp, OptimizeTezProcContext context) + throws SemanticException { + if (context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) && + context.conf.getBoolVar(HiveConf.ConfVars.HIVEDYNAMICPARTITIONHASHJOIN)) { + // Attempt dynamic partitioned hash join + int numReducers = getNumReducers(joinOp); + LOG.info("Try dynamic partitioned hash join with " + numReducers + " reducers"); + int bigTablePos = getMapJoinConversionPos(joinOp, context, numReducers); + if (bigTablePos >= 0) { + MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePos, false); + if (mapJoinOp != null) { + LOG.info("Selected dynamic partitioned hash join"); + mapJoinOp.getConf().setDynamicPartitionHashJoin(true); + return; + } + } + } + + // we are just converting to a common merge join operator. The shuffle + // join in map-reduce case. + int pos = 0; // it doesn't matter which position we use in this case. + LOG.info("Fallback to common merge join operator"); + convertJoinSMBJoin(joinOp, context, pos, 0, false); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 4d84f0f..f8f2b7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -29,6 +29,8 @@ import java.util.Set; import java.util.Stack; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; @@ -57,6 +59,7 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.GenMapRedWalker; +import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -89,6 +92,7 @@ // (column type + column name). The column name is not really used anywhere, but it // needs to be passed. Use the string defined below for that. private static final String MAPJOINKEY_FIELDPREFIX = "mapjoinkey"; + private static final Log LOG = LogFactory.getLog(MapJoinProcessor.class.getName()); public MapJoinProcessor() { } @@ -356,11 +360,18 @@ public MapJoinOperator convertMapJoin(HiveConf conf, public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List mapAliases, int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException { + return convertJoinOpMapJoinOp(hconf, op, leftInputJoin, baseSrc, mapAliases, + mapJoinPos, noCheckOuterJoin, true); + } + + public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, + JoinOperator op, boolean leftInputJoin, String[] baseSrc, List mapAliases, + int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) + throws SemanticException { MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, - mapJoinPos, noCheckOuterJoin); - + mapJoinPos, noCheckOuterJoin, adjustParentsChildren); // reduce sink row resolver used to generate map join op RowSchema outputRS = op.getSchema(); @@ -1025,7 +1036,7 @@ public void setpGraphContext(ParseContext pGraphContext) { public static MapJoinDesc getMapJoinDesc(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List mapAliases, - int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException { + int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException { JoinDesc desc = op.getConf(); JoinCondDesc[] condns = desc.getConds(); Byte[] tagOrder = desc.getTagOrder(); @@ -1072,6 +1083,26 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, // get the join keys from old parent ReduceSink operators Map> keyExprMap = pair.getSecond(); + if (!adjustParentsChildren) { + // Since we did not remove reduce sink parents, keep the original value expressions + newValueExprs = valueExprs; + + // Join key exprs are represented in terms of the original table columns, + // we need to convert these to the generated column names we can see in the Join operator + Map> newKeyExprMap = new HashMap>(); + for (Map.Entry> mapEntry : keyExprMap.entrySet()) { + Byte pos = mapEntry.getKey(); + ReduceSinkOperator rsParent = oldReduceSinkParentOps.get(pos.byteValue()); + List keyExprList = + ExprNodeDescUtils.resolveJoinKeysAsRSColumns(mapEntry.getValue(), rsParent); + if (keyExprList == null) { + throw new SemanticException("Error resolving join keys"); + } + newKeyExprMap.put(pos, keyExprList); + } + keyExprMap = newKeyExprMap; + } + // construct valueTableDescs and valueFilteredTableDescs List valueTableDescs = new ArrayList(); List valueFilteredTableDescs = new ArrayList(); @@ -1163,4 +1194,11 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, return mapJoinDescriptor; } + + public static MapJoinDesc getMapJoinDesc(HiveConf hconf, + JoinOperator op, boolean leftInputJoin, String[] baseSrc, List mapAliases, + int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException { + return getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, + mapAliases, mapJoinPos, noCheckOuterJoin, true); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java index bca91dd..b546838 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java @@ -21,12 +21,15 @@ import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -58,11 +61,13 @@ import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.hive.ql.stats.StatsUtils; +import com.google.common.collect.Sets; + import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.FIXED; public class ReduceSinkMapJoinProc implements NodeProcessor { - protected transient Log LOG = LogFactory.getLog(this.getClass().getName()); + private final static Log LOG = LogFactory.getLog(ReduceSinkMapJoinProc.class.getName()); /* (non-Javadoc) * This processor addresses the RS-MJ case that occurs in tez on the small/hash @@ -79,7 +84,40 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, GenTezProcContext context = (GenTezProcContext) procContext; MapJoinOperator mapJoinOp = (MapJoinOperator)nd; - if (stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator)) { + // remember the original parent list before we start modifying it. + if (!context.mapJoinParentMap.containsKey(mapJoinOp)) { + List> parents = new ArrayList>(mapJoinOp.getParentOperators()); + context.mapJoinParentMap.put(mapJoinOp, parents); + } + + boolean isBigTable = stack.size() < 2 + || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator); + + ReduceSinkOperator parentRS = null; + if (!isBigTable) { + parentRS = (ReduceSinkOperator)stack.get(stack.size() - 2); + + // For dynamic partitioned hash join, the big table will also be coming from a ReduceSinkOperator + // Check for this condition. + // TODO: use indexOf(), or parentRS.getTag()? + isBigTable = + (mapJoinOp.getParentOperators().indexOf(parentRS) == mapJoinOp.getConf().getPosBigTable()); + } + + if (mapJoinOp.getConf().isDynamicPartitionHashJoin() && + !context.mapJoinToUnprocessedSmallTableReduceSinks.containsKey(mapJoinOp)) { + // Initialize set of unprocessed small tables + Set rsSet = Sets.newIdentityHashSet(); + for (int pos = 0; pos < mapJoinOp.getParentOperators().size(); ++pos) { + if (pos == mapJoinOp.getConf().getPosBigTable()) { + continue; + } + rsSet.add((ReduceSinkOperator) mapJoinOp.getParentOperators().get(pos)); + } + context.mapJoinToUnprocessedSmallTableReduceSinks.put(mapJoinOp, rsSet); + } + + if (isBigTable) { context.currentMapJoinOperators.add(mapJoinOp); return null; } @@ -87,14 +125,29 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, context.preceedingWork = null; context.currentRootOperator = null; - ReduceSinkOperator parentRS = (ReduceSinkOperator)stack.get(stack.size() - 2); + return processReduceSinkToHashJoin(parentRS, mapJoinOp, context); + } + + public static BaseWork getMapJoinParentWork(GenTezProcContext context, Operator parentRS) { + BaseWork parentWork; + if (context.unionWorkMap.containsKey(parentRS)) { + parentWork = context.unionWorkMap.get(parentRS); + } else { + assert context.childToWorkMap.get(parentRS).size() == 1; + parentWork = context.childToWorkMap.get(parentRS).get(0); + } + return parentWork; + } + + public static Object processReduceSinkToHashJoin(ReduceSinkOperator parentRS, MapJoinOperator mapJoinOp, + GenTezProcContext context) throws SemanticException { // remove the tag for in-memory side of mapjoin parentRS.getConf().setSkipTag(true); parentRS.setSkipTag(true); - // remember the original parent list before we start modifying it. - if (!context.mapJoinParentMap.containsKey(mapJoinOp)) { - List> parents = new ArrayList>(mapJoinOp.getParentOperators()); - context.mapJoinParentMap.put(mapJoinOp, parents); + + // Mark this small table as being processed + if (mapJoinOp.getConf().isDynamicPartitionHashJoin()) { + context.mapJoinToUnprocessedSmallTableReduceSinks.get(mapJoinOp).remove(parentRS); } List mapJoinWork = null; @@ -109,13 +162,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, * */ mapJoinWork = context.mapJoinWorkMap.get(mapJoinOp); - BaseWork parentWork; - if (context.unionWorkMap.containsKey(parentRS)) { - parentWork = context.unionWorkMap.get(parentRS); - } else { - assert context.childToWorkMap.get(parentRS).size() == 1; - parentWork = context.childToWorkMap.get(parentRS).get(0); - } + BaseWork parentWork = getMapJoinParentWork(context, parentRS); // set the link between mapjoin and parent vertex int pos = context.mapJoinParentMap.get(mapJoinOp).indexOf(parentRS); @@ -161,6 +208,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, keyCount /= bucketCount; tableSize /= bucketCount; } + } else if (joinConf.isDynamicPartitionHashJoin()) { + // For dynamic partitioned hash join, assuming table is split evenly among the reduce tasks. + bucketCount = parentRS.getConf().getNumReducers(); + keyCount /= bucketCount; + tableSize /= bucketCount; } } LOG.info("Mapjoin " + mapJoinOp + ", pos: " + pos + " --> " + parentWork.getName() + " (" @@ -218,6 +270,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, edgeType = EdgeType.CUSTOM_SIMPLE_EDGE; } } + } else if (mapJoinOp.getConf().isDynamicPartitionHashJoin()) { + edgeType = EdgeType.CUSTOM_SIMPLE_EDGE; } TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets); @@ -232,7 +286,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, } ReduceSinkOperator r = null; - if (parentRS.getConf().getOutputName() != null) { + if (context.connectedReduceSinks.contains(parentRS)) { LOG.debug("Cloning reduce sink for multi-child broadcast edge"); // we've already set this one up. Need to clone for the next work. r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java index adc31ae..c4c1c09 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import java.io.Serializable; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; @@ -104,6 +105,10 @@ // map that says which mapjoin belongs to which work item public final Map> mapJoinWorkMap; + // Mapping of reducesink to mapjoin operators + // Only used for dynamic partitioned hash joins (mapjoin operator in the reducer) + public final Map, MapJoinOperator> smallTableParentToMapJoinMap; + // a map to keep track of which root generated which work public final Map, BaseWork> rootToWorkMap; @@ -148,6 +153,11 @@ // remember the connections between ts and event public final Map> tsToEventMap; + // When processing dynamic partitioned hash joins, some of the small tables may not get processed + // before the mapjoin's parents are removed during GenTezWork.process(). This is to keep + // track of which small tables haven't been processed yet. + public Map> mapJoinToUnprocessedSmallTableReduceSinks; + @SuppressWarnings("unchecked") public GenTezProcContext(HiveConf conf, ParseContext parseContext, List> moveTask, List> rootTasks, @@ -164,6 +174,7 @@ public GenTezProcContext(HiveConf conf, ParseContext parseContext, this.leafOperatorToFollowingWork = new LinkedHashMap, BaseWork>(); this.linkOpWithWorkMap = new LinkedHashMap, Map>(); this.linkWorkWithReduceSinkMap = new LinkedHashMap>(); + this.smallTableParentToMapJoinMap = new LinkedHashMap, MapJoinOperator>(); this.mapJoinWorkMap = new LinkedHashMap>(); this.rootToWorkMap = new LinkedHashMap, BaseWork>(); this.childToWorkMap = new LinkedHashMap, List>(); @@ -185,6 +196,7 @@ public GenTezProcContext(HiveConf conf, ParseContext parseContext, this.tsToEventMap = new LinkedHashMap>(); this.opMergeJoinWorkMap = new LinkedHashMap, MergeJoinWork>(); this.currentMergeJoinOperator = null; + this.mapJoinToUnprocessedSmallTableReduceSinks = new HashMap>(); rootTasks.add(currentTask); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 11c1df6..20f8e95 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -145,12 +146,13 @@ public ReduceWork createReduceWork(GenTezProcContext context, Operator root, tezWork.add(reduceWork); TezEdgeProperty edgeProp; + EdgeType edgeType = determineEdgeType(context.preceedingWork, reduceWork); if (reduceWork.isAutoReduceParallelism()) { edgeProp = - new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true, + new TezEdgeProperty(context.conf, edgeType, true, reduceWork.getMinReduceTasks(), reduceWork.getMaxReduceTasks(), bytesPerReducer); } else { - edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE); + edgeProp = new TezEdgeProperty(edgeType); } tezWork.connect( @@ -485,4 +487,21 @@ public void removeBranch(AppMasterEventOperator event) { curr.removeChild(child); } + + public EdgeType determineEdgeType(BaseWork preceedingWork, BaseWork followingWork) { + if (followingWork instanceof ReduceWork) { + // Ideally there should be a better way to determine that the followingWork contains + // a dynamic partitioned hash join, but in some cases (createReduceWork()) it looks like + // the work must be created/connected first, before the GenTezProcContext can be updated + // with the mapjoin/work relationship. + ReduceWork reduceWork = (ReduceWork) followingWork; + if (reduceWork.getReducer() instanceof MapJoinOperator) { + MapJoinOperator joinOp = (MapJoinOperator) reduceWork.getReducer(); + if (joinOp.getConf().isDynamicPartitionHashJoin()) { + return EdgeType.CUSTOM_SIMPLE_EDGE; + } + } + } + return EdgeType.SIMPLE_EDGE; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 6db8220..a892e9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MergeJoinWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -169,8 +170,11 @@ public Object process(Node nd, Stack stack, mergeJoinWork.addMergedWork(work, null, context.leafOperatorToFollowingWork); Operator parentOp = getParentFromStack(context.currentMergeJoinOperator, stack); + // Set the big table position. Both the reduce work and merge join operator + // should be set with the same value. int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp); work.setTag(pos); + context.currentMergeJoinOperator.getConf().setBigTablePosition(pos); tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES); for (BaseWork parentWork : tezWork.getParents(work)) { TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work); @@ -194,6 +198,50 @@ public Object process(Node nd, Stack stack, // remember which mapjoin operator links with which work if (!context.currentMapJoinOperators.isEmpty()) { for (MapJoinOperator mj: context.currentMapJoinOperators) { + // For dynamic partitioned hash join, ReduceSinkMapJoinProc rule may not get run for all + // of the ReduceSink parents, because the parents of the MapJoin operator get + // removed later on in this method. Keep track of the parent to mapjoin mapping + // so we can later run the same logic that is run in ReduceSinkMapJoinProc. + if (mj.getConf().isDynamicPartitionHashJoin()) { + // Since this is a dynamic partitioned hash join, the work for this join should be a ReduceWork + ReduceWork reduceWork = (ReduceWork) work; + int bigTablePosition = mj.getConf().getPosBigTable(); + reduceWork.setTag(bigTablePosition); + + // Use context.mapJoinParentMap to get the original RS parents, because + // the MapJoin's parents may have been replaced by dummy operator. + List> mapJoinOriginalParents = context.mapJoinParentMap.get(mj); + if (mapJoinOriginalParents == null) { + throw new SemanticException("Unexpected error - context.mapJoinParentMap did not have an entry for " + mj); + } + for (int pos = 0; pos < mapJoinOriginalParents.size(); ++pos) { + // This processing only needs to happen for the small tables + if (pos == bigTablePosition) { + continue; + } + Operator parentOp = mapJoinOriginalParents.get(pos); + context.smallTableParentToMapJoinMap.put(parentOp, mj); + + ReduceSinkOperator parentRS = (ReduceSinkOperator) parentOp; + + // TableDesc needed for dynamic partitioned hash join + GenMapRedUtils.setKeyAndValueDesc(reduceWork, parentRS); + + // For small table RS parents that have already been processed, we need to + // add the tag to the RS work to the reduce work that contains this map join. + // This was not being done for normal mapjoins, where the small table typically + // has its ReduceSink parent removed. + if (!context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(parentRS)) { + // This reduce sink has been processed already, so the work for the parentRS exists + BaseWork parentWork = ReduceSinkMapJoinProc.getMapJoinParentWork(context, parentRS); + int tag = parentRS.getConf().getTag(); + tag = (tag == -1 ? 0 : tag); + reduceWork.getTagToInput().put(tag, parentWork.getName()); + } + + } + } + LOG.debug("Processing map join: " + mj); // remember the mapping in case we scan another branch of the // mapjoin later @@ -373,15 +421,44 @@ public Object process(Node nd, Stack stack, // remember the output name of the reduce sink rs.getConf().setOutputName(rWork.getName()); + // For dynamic partitioned hash join, run the ReduceSinkMapJoinProc logic for any + // ReduceSink parents that we missed. + MapJoinOperator mj = context.smallTableParentToMapJoinMap.get(rs); + if (mj != null) { + // Only need to run the logic for tables we missed + if (context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(rs)) { + // ReduceSinkMapJoinProc logic does not work unless the ReduceSink is connected as + // a parent of the MapJoin, but at this point we have already removed all of the + // parents from the MapJoin. + // Try temporarily adding the RS as a parent + ArrayList> tempMJParents = new ArrayList>(); + tempMJParents.add(rs); + mj.setParentOperators(tempMJParents); + // ReduceSink also needs MapJoin as child + List> rsChildren = rs.getChildOperators(); + rsChildren.add(mj); + + // Since the MapJoin has had all of its other parents removed at this point, + // it would be bad here if processReduceSinkToHashJoin() tries to do anything + // with the RS parent based on its position in the list of parents. + ReduceSinkMapJoinProc.processReduceSinkToHashJoin(rs, mj, context); + + // Remove any parents from MapJoin again + mj.removeParents(); + // TODO: do we also need to remove the MapJoin from the list of RS's children? + } + } + if (!context.connectedReduceSinks.contains(rs)) { // add dependency between the two work items TezEdgeProperty edgeProp; + EdgeType edgeType = utils.determineEdgeType(work, followingWork); if (rWork.isAutoReduceParallelism()) { edgeProp = - new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true, + new TezEdgeProperty(context.conf, edgeType, true, rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer); } else { - edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE); + edgeProp = new TezEdgeProperty(edgeType); } tezWork.connect(work, followingWork, edgeProp); context.connectedReduceSinks.add(rs); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index a342738..ed896e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -44,7 +44,7 @@ // Their function is mainly as root ops to give the mapjoin the correct // schema info. List dummyOps; - int tag; + int tag = 0; private final List sortColNames = new ArrayList(); private MapredLocalWork mrLocalWork; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java index f9c34cb..cce9bc4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java @@ -45,4 +45,8 @@ public int getNumBuckets() { public int getBigTablePosition() { return mapJoinConversionPos; } + + public void setBigTablePosition(int pos) { + mapJoinConversionPos = pos; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index fb3c4a3..e291a48 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; @@ -278,6 +279,59 @@ private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator cur throw new SemanticException("Met multiple parent operators"); } + public static List resolveJoinKeysAsRSColumns(List sourceList, + Operator reduceSinkOp) { + ArrayList result = new ArrayList(sourceList.size()); + for (ExprNodeDesc source : sourceList) { + ExprNodeDesc newExpr = resolveJoinKeysAsRSColumns(source, reduceSinkOp); + if (newExpr == null) { + return null; + } + result.add(newExpr); + } + return result; + } + + /** + * Join keys are expressions based on the select operator. Resolve the expressions so they + * are based on the ReduceSink operator + * SEL -> RS -> JOIN + * @param source + * @param reduceSinkOp + * @return + */ + public static ExprNodeDesc resolveJoinKeysAsRSColumns(ExprNodeDesc source, Operator reduceSinkOp) { + // Assuming this is only being done for join keys. As a result we shouldn't have to recursively + // check any nested child expressions, because the result of the expression should exist as an + // output column of the ReduceSink operator + if (source == null) { + return null; + } + + // columnExprMap has the reverse of what we need - a mapping of the internal column names + // to the ExprNodeDesc from the previous operation. + // Find the key/value where the ExprNodeDesc value matches the column we are searching for. + // The key portion of the entry will be the internal column name for the join key expression. + for (Map.Entry mapEntry : reduceSinkOp.getColumnExprMap().entrySet()) { + if (mapEntry.getValue().isSame(source)) { + String columnInternalName = mapEntry.getKey(); + if (source instanceof ExprNodeColumnDesc) { + // The join key is a table column. Create the ExprNodeDesc based on this column. + ColumnInfo columnInfo = reduceSinkOp.getSchema().getColumnInfo(columnInternalName); + return new ExprNodeColumnDesc(columnInfo); + } else { + // Join key expression is likely some expression involving functions/operators, so there + // is no actual table column for this. But the ReduceSink operator should still have an + // output column corresponding to this expression, using the columnInternalName. + // TODO: does tableAlias matter for this kind of expression? + return new ExprNodeColumnDesc(source.getTypeInfo(), columnInternalName, "", false); + } + } + } + + return null; // Couldn't find reference to expression + } + public static ExprNodeDesc[] extractComparePair(ExprNodeDesc expr1, ExprNodeDesc expr2) { expr1 = extractConstant(expr1); expr2 = extractConstant(expr2); @@ -483,4 +537,65 @@ public static PrimitiveTypeInfo deriveMinArgumentCast( return exprColLst; } + + public static List flattenExprList(List sourceList) { + ArrayList result = new ArrayList(sourceList.size()); + for (ExprNodeDesc source : sourceList) { + result.add(flattenExpr(source)); + } + return result; + } + + /** + * A normal reduce operator's rowObjectInspector looks like a struct containing + * nested key/value structs that contain the column values: + * { key: { reducesinkkey0:int }, value: { _col0:int, _col1:int, .. } } + * + * While the rowObjectInspector looks the same for vectorized queries during + * compilation time, within the tasks at query execution the rowObjectInspector + * has changed to a flatter structure without nested key/value structs: + * { 'key.reducesinkkey0':int, 'value._col0':int, 'value._col1':int, .. } + * + * Trying to fetch 'key.reducesinkkey0' by name from the list of flattened + * ObjectInspectors does not work because the '.' gets interpreted as a field member, + * even though it is a flattened list of column values. + * This workaround converts the column name referenced in the ExprNodeDesc + * from a nested field name (key.reducesinkkey0) to key_reducesinkkey0, + * simply by replacing '.' with '_'. + * @param source + * @return + */ + public static ExprNodeDesc flattenExpr(ExprNodeDesc source) { + if (source instanceof ExprNodeGenericFuncDesc) { + // all children expression should be resolved + ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone(); + List newChildren = flattenExprList(function.getChildren()); + for (ExprNodeDesc newChild : newChildren) { + if (newChild == null) { + // Could not resolve all of the function children, fail + return null; + } + } + function.setChildren(newChildren); + return function; + } + if (source instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc column = (ExprNodeColumnDesc) source; + // Create a new ColumnInfo, replacing STRUCT.COLUMN with STRUCT_COLUMN + String newColumn = column.getColumn().replace('.', '_'); + return new ExprNodeColumnDesc(source.getTypeInfo(), newColumn, column.getTabAlias(), false); + } + if (source instanceof ExprNodeFieldDesc) { + // field expression should be resolved + ExprNodeFieldDesc field = (ExprNodeFieldDesc) source.clone(); + ExprNodeDesc fieldDesc = flattenExpr(field.getDesc()); + if (fieldDesc == null) { + return null; + } + field.setDesc(fieldDesc); + return field; + } + // constant or null expr, just return + return source; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index cee9100..e27b89b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -29,6 +29,8 @@ import java.util.Set; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; + /** * Map Join operator Descriptor implementation. * @@ -71,6 +73,7 @@ protected boolean genJoinKeys = true; private boolean isHybridHashJoin; + private boolean isDynamicPartitionHashJoin = false; // Extra parameters only for vectorization. private VectorMapJoinDesc vectorDesc; @@ -369,4 +372,12 @@ public void setGenJoinKeys(boolean genJoinKeys) { public boolean getGenJoinKeys() { return genJoinKeys; } + + public boolean isDynamicPartitionHashJoin() { + return isDynamicPartitionHashJoin; + } + + public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { + this.isDynamicPartitionHashJoin = isDistributedHashJoin; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index a78a92e..020d6de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -136,7 +136,7 @@ public ObjectInspector getValueObjectInspector() { return null; } if (valueObjectInspector == null) { - valueObjectInspector = getObjectInspector(tagToValueDesc.get(0)); + valueObjectInspector = getObjectInspector(tagToValueDesc.get(tag)); } return valueObjectInspector; } diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q new file mode 100644 index 0000000..e3325c4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q @@ -0,0 +1,101 @@ + +set hive.explain.user=false; +set hive.auto.convert.join=false; +set hive.optimize.dynamic.partition.hashjoin=false; + +-- First try with regular mergejoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.exec.reducers.bytes.per.reducer=200000; + +-- Try with dynamically partitioned hashjoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_2.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_2.q new file mode 100644 index 0000000..af4e2b8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_2.q @@ -0,0 +1,83 @@ + +set hive.explain.user=false; +set hive.auto.convert.join=false; +set hive.optimize.dynamic.partition.hashjoin=false; + +-- Multiple tables, and change the order of the big table (alltypesorc) +-- First try with regular mergejoin +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.exec.reducers.bytes.per.reducer=200000; + +-- noconditionaltask.size needs to be low enough that entire filtered table results do not fit in one task's hash table +-- Try with dynamically partitioned hash join +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +-- Try different order of tables +explain +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; diff --git a/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_1.q b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_1.q new file mode 100644 index 0000000..65fee16 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_1.q @@ -0,0 +1,102 @@ + +set hive.explain.user=false; +set hive.auto.convert.join=false; +set hive.optimize.dynamic.partition.hashjoin=false; + +-- First try with regular mergejoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.exec.reducers.bytes.per.reducer=200000; +set hive.vectorized.execution.enabled=true; + +-- Try with dynamically partitioned hashjoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; diff --git a/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_2.q b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_2.q new file mode 100644 index 0000000..606f455 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_2.q @@ -0,0 +1,84 @@ + +set hive.explain.user=false; +set hive.auto.convert.join=false; +set hive.optimize.dynamic.partition.hashjoin=false; + +-- Multiple tables, and change the order of the big table (alltypesorc) +-- First try with regular mergejoin +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.exec.reducers.bytes.per.reducer=200000; +set hive.vectorized.execution.enabled=true; + +-- noconditionaltask.size needs to be low enough that entire filtered table results do not fit in one task's hash table +-- Try with dynamically partitioned hash join +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +-- Try different order of tables +explain +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; + +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint; diff --git a/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_1.q.out new file mode 100644 index 0000000..8a46807 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_1.q.out @@ -0,0 +1,791 @@ +PREHOOK: query: -- First try with regular mergejoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- First try with regular mergejoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true 11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true +NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true +NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true +NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false +11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true 11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true +-51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true -51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true +NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true +8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false 8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false +8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true 8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true +-51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false -51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false +PREHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10 +PREHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint (type: smallint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10782 1 +-3799 1 +-8915 1 +-13036 1 +NULL 6 +PREHOOK: query: -- Try with dynamically partitioned hashjoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- Try with dynamically partitioned hashjoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + input vertices: + 1 Map 4 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true 11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true +NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true +NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true +NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false +11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true 11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true +-51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true -51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true +NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true +8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false 8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false +8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true 8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true +-51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false -51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false +PREHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10 +PREHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint (type: smallint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10782 1 +-3799 1 +-8915 1 +-13036 1 +NULL 6 diff --git a/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_2.q.out new file mode 100644 index 0000000..0cf6b79 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_2.q.out @@ -0,0 +1,637 @@ +PREHOOK: query: -- Multiple tables, and change the order of the big table (alltypesorc) +-- First try with regular mergejoin +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- Multiple tables, and change the order of the big table (alltypesorc) +-- First try with regular mergejoin +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(key) + 0) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (UDFToInteger(_col0) + 0) (type: int) + sort order: + + Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToInteger(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToInteger(_col0) + 0) (type: int) + 1 UDFToInteger(_col1) (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToInteger(_col1) (type: int) + 1 UDFToInteger(_col0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col10 (type: boolean), _col11 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp) + outputColumnNames: _col0, _col1, _col10, _col11, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-19 8 626923679 NULL -19.0 8.0 821UdmGbkEf4j NULL 1969-12-31 15:59:46.619 1969-12-31 15:59:46.95 true NULL +6 8 528534767 NULL 6.0 8.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.459 1969-12-31 16:00:00.236 true NULL +NULL 9 -470743566 -1887561756 NULL 9.0 swx5K33Sm5qcKR5B 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:07.318 true false +NULL 10 813877020 -1645852809 NULL 10.0 4QG23O2GKF6BUe13O7A2C xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.851 false false +-62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL +NULL 19 312515097 1864027286 NULL 19.0 ds5YqbRvhf3Sb2 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:56.211 false true +-7 19 528534767 NULL -7.0 19.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.994 1969-12-31 15:59:55.362 true NULL +-45 20 253665376 NULL -45.0 20.0 1cGVWH7n1QU NULL 1969-12-31 16:00:09.949 1969-12-31 16:00:10.979 true NULL +NULL 34 510824788 -1887561756 NULL 34.0 nj1bXoh6k 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:46.017 true false +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +61 41 528534767 NULL 61.0 41.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.708 1969-12-31 16:00:14.412 true NULL +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +39 74 626923679 NULL 39.0 74.0 821UdmGbkEf4j NULL 1969-12-31 16:00:10.403 1969-12-31 16:00:12.52 true NULL +47 74 626923679 NULL 47.0 74.0 821UdmGbkEf4j NULL 1969-12-31 15:59:57.849 1969-12-31 15:59:57.569 true NULL +-22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL +PREHOOK: query: -- noconditionaltask.size needs to be low enough that entire filtered table results do not fit in one task's hash table +-- Try with dynamically partitioned hash join +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- noconditionaltask.size needs to be low enough that entire filtered table results do not fit in one task's hash table +-- Try with dynamically partitioned hash join +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(key) + 0) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (UDFToInteger(_col0) + 0) (type: int) + sort order: + + Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToInteger(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col10 (type: boolean), _col11 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp) + outputColumnNames: _col0, _col1, _col10, _col11, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 4 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + input vertices: + 0 Map 1 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-19 8 626923679 NULL -19.0 8.0 821UdmGbkEf4j NULL 1969-12-31 15:59:46.619 1969-12-31 15:59:46.95 true NULL +6 8 528534767 NULL 6.0 8.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.459 1969-12-31 16:00:00.236 true NULL +NULL 9 -470743566 -1887561756 NULL 9.0 swx5K33Sm5qcKR5B 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:07.318 true false +NULL 10 813877020 -1645852809 NULL 10.0 4QG23O2GKF6BUe13O7A2C xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.851 false false +-62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL +NULL 19 312515097 1864027286 NULL 19.0 ds5YqbRvhf3Sb2 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:56.211 false true +-7 19 528534767 NULL -7.0 19.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.994 1969-12-31 15:59:55.362 true NULL +-45 20 253665376 NULL -45.0 20.0 1cGVWH7n1QU NULL 1969-12-31 16:00:09.949 1969-12-31 16:00:10.979 true NULL +NULL 34 510824788 -1887561756 NULL 34.0 nj1bXoh6k 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:46.017 true false +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +61 41 528534767 NULL 61.0 41.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.708 1969-12-31 16:00:14.412 true NULL +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +39 74 626923679 NULL 39.0 74.0 821UdmGbkEf4j NULL 1969-12-31 16:00:10.403 1969-12-31 16:00:12.52 true NULL +47 74 626923679 NULL 47.0 74.0 821UdmGbkEf4j NULL 1969-12-31 15:59:57.849 1969-12-31 15:59:57.569 true NULL +-22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL +PREHOOK: query: -- Try different order of tables +explain +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- Try different order of tables +explain +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(key) + 0) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (UDFToInteger(_col0) + 0) (type: int) + sort order: + + Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToInteger(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col10 (type: boolean), _col11 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp) + outputColumnNames: _col0, _col1, _col10, _col11, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 4 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + input vertices: + 0 Map 1 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-19 8 626923679 NULL -19.0 8.0 821UdmGbkEf4j NULL 1969-12-31 15:59:46.619 1969-12-31 15:59:46.95 true NULL +6 8 528534767 NULL 6.0 8.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.459 1969-12-31 16:00:00.236 true NULL +NULL 9 -470743566 -1887561756 NULL 9.0 swx5K33Sm5qcKR5B 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:07.318 true false +NULL 10 813877020 -1645852809 NULL 10.0 4QG23O2GKF6BUe13O7A2C xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.851 false false +-62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL +NULL 19 312515097 1864027286 NULL 19.0 ds5YqbRvhf3Sb2 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:56.211 false true +-7 19 528534767 NULL -7.0 19.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.994 1969-12-31 15:59:55.362 true NULL +-45 20 253665376 NULL -45.0 20.0 1cGVWH7n1QU NULL 1969-12-31 16:00:09.949 1969-12-31 16:00:10.979 true NULL +NULL 34 510824788 -1887561756 NULL 34.0 nj1bXoh6k 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:46.017 true false +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +61 41 528534767 NULL 61.0 41.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.708 1969-12-31 16:00:14.412 true NULL +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +39 74 626923679 NULL 39.0 74.0 821UdmGbkEf4j NULL 1969-12-31 16:00:10.403 1969-12-31 16:00:12.52 true NULL +47 74 626923679 NULL 47.0 74.0 821UdmGbkEf4j NULL 1969-12-31 15:59:57.849 1969-12-31 15:59:57.569 true NULL +-22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL diff --git a/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_1.q.out new file mode 100644 index 0000000..ba35e4c --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_1.q.out @@ -0,0 +1,804 @@ +PREHOOK: query: -- First try with regular mergejoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- First try with regular mergejoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true 11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true +NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true +NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true +NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false +11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true 11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true +-51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true -51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true +NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true +8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false 8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false +8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true 8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true +-51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false -51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false +PREHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10 +PREHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint (type: smallint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10782 1 +-3799 1 +-8915 1 +-13036 1 +NULL 6 +PREHOOK: query: -- Try with dynamically partitioned hashjoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- Try with dynamically partitioned hashjoin +explain +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + input vertices: + 1 Map 4 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true 11 NULL 1000828 1531084669 11.0 NULL wM316f6NqGIkoP388j3F6 poWQQo3Upvt3Wh 1969-12-31 16:00:02.351 NULL false true +NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true NULL -3799 1248059 1864027286 NULL -3799.0 Uhps6mMh3IfHB3j7yH62K 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:54.622 false true +NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true NULL 10782 1286921 1864027286 NULL 10782.0 ODLrXI8882q8LS8 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:52.138 true true +NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false NULL -13036 1288927 -1645852809 NULL -13036.0 yinBY725P7V2 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:00.763 true false +11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true 11 NULL 1310786 -413875656 11.0 NULL W0rvA4H1xn0xMG4uk0 8yVVjG 1969-12-31 16:00:02.351 NULL false true +-51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true -51 NULL 2089466 -240556350 -51.0 NULL cXX24dH7tblSj46j2g C31eea0wrHHqvj 1969-12-31 16:00:08.451 NULL true true +NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true NULL -8915 2101183 1864027286 NULL -8915.0 x7By66525 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:05.831 false true +8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false 8 NULL 2229621 -381406148 8.0 NULL q7onkS7QRPh5ghOK oKb0bi 1969-12-31 16:00:15.892 NULL true false +8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true 8 NULL 2433892 -1611863517 8.0 NULL 674ILv3V2TxFqXP6wSbL VLprkK2XfX 1969-12-31 16:00:15.892 NULL false true +-51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false -51 NULL 2949963 -1580871111 -51.0 NULL 0K68k3bdl7jO7 TPPAu 1969-12-31 16:00:08.451 NULL true false +PREHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10 +PREHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint BETWEEN 1000000 AND 3000000 and cint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint (type: smallint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cbigint is not null and cint BETWEEN 1000000 AND 3000000) and cint is not null) (type: boolean) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint) + Execution mode: vectorized + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +10782 1 +-3799 1 +-8915 1 +-13036 1 +NULL 6 diff --git a/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_2.q.out new file mode 100644 index 0000000..4c38f51 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_2.q.out @@ -0,0 +1,645 @@ +PREHOOK: query: -- Multiple tables, and change the order of the big table (alltypesorc) +-- First try with regular mergejoin +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- Multiple tables, and change the order of the big table (alltypesorc) +-- First try with regular mergejoin +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(key) + 0) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (UDFToInteger(_col0) + 0) (type: int) + sort order: + + Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToInteger(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToInteger(_col0) + 0) (type: int) + 1 UDFToInteger(_col1) (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToInteger(_col1) (type: int) + 1 UDFToInteger(_col0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col10 (type: boolean), _col11 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp) + outputColumnNames: _col0, _col1, _col10, _col11, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-19 8 626923679 NULL -19.0 8.0 821UdmGbkEf4j NULL 1969-12-31 15:59:46.619 1969-12-31 15:59:46.95 true NULL +6 8 528534767 NULL 6.0 8.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.459 1969-12-31 16:00:00.236 true NULL +NULL 9 -470743566 -1887561756 NULL 9.0 swx5K33Sm5qcKR5B 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:07.318 true false +NULL 10 813877020 -1645852809 NULL 10.0 4QG23O2GKF6BUe13O7A2C xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.851 false false +-62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL +NULL 19 312515097 1864027286 NULL 19.0 ds5YqbRvhf3Sb2 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:56.211 false true +-7 19 528534767 NULL -7.0 19.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.994 1969-12-31 15:59:55.362 true NULL +-45 20 253665376 NULL -45.0 20.0 1cGVWH7n1QU NULL 1969-12-31 16:00:09.949 1969-12-31 16:00:10.979 true NULL +NULL 34 510824788 -1887561756 NULL 34.0 nj1bXoh6k 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:46.017 true false +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +61 41 528534767 NULL 61.0 41.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.708 1969-12-31 16:00:14.412 true NULL +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +39 74 626923679 NULL 39.0 74.0 821UdmGbkEf4j NULL 1969-12-31 16:00:10.403 1969-12-31 16:00:12.52 true NULL +47 74 626923679 NULL 47.0 74.0 821UdmGbkEf4j NULL 1969-12-31 15:59:57.849 1969-12-31 15:59:57.569 true NULL +-22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL +PREHOOK: query: -- noconditionaltask.size needs to be low enough that entire filtered table results do not fit in one task's hash table +-- Try with dynamically partitioned hash join +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- noconditionaltask.size needs to be low enough that entire filtered table results do not fit in one task's hash table +-- Try with dynamically partitioned hash join +explain +select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(key) + 0) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (UDFToInteger(_col0) + 0) (type: int) + sort order: + + Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToInteger(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col10 (type: boolean), _col11 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp) + outputColumnNames: _col0, _col1, _col10, _col11, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Reducer 4 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + input vertices: + 0 Map 1 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + a.* +from + alltypesorc a, + src b, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-19 8 626923679 NULL -19.0 8.0 821UdmGbkEf4j NULL 1969-12-31 15:59:46.619 1969-12-31 15:59:46.95 true NULL +6 8 528534767 NULL 6.0 8.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.459 1969-12-31 16:00:00.236 true NULL +NULL 9 -470743566 -1887561756 NULL 9.0 swx5K33Sm5qcKR5B 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:07.318 true false +NULL 10 813877020 -1645852809 NULL 10.0 4QG23O2GKF6BUe13O7A2C xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.851 false false +-62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL +NULL 19 312515097 1864027286 NULL 19.0 ds5YqbRvhf3Sb2 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:56.211 false true +-7 19 528534767 NULL -7.0 19.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.994 1969-12-31 15:59:55.362 true NULL +-45 20 253665376 NULL -45.0 20.0 1cGVWH7n1QU NULL 1969-12-31 16:00:09.949 1969-12-31 16:00:10.979 true NULL +NULL 34 510824788 -1887561756 NULL 34.0 nj1bXoh6k 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:46.017 true false +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +61 41 528534767 NULL 61.0 41.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.708 1969-12-31 16:00:14.412 true NULL +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +39 74 626923679 NULL 39.0 74.0 821UdmGbkEf4j NULL 1969-12-31 16:00:10.403 1969-12-31 16:00:12.52 true NULL +47 74 626923679 NULL 47.0 74.0 821UdmGbkEf4j NULL 1969-12-31 15:59:57.849 1969-12-31 15:59:57.569 true NULL +-22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL +PREHOOK: query: -- Try different order of tables +explain +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +POSTHOOK: query: -- Try different order of tables +explain +select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(key) + 0) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (UDFToInteger(_col0) + 0) (type: int) + sort order: + + Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToInteger(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col10 (type: boolean), _col11 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp) + outputColumnNames: _col0, _col1, _col10, _col11, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col1) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col1) (type: int) + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Reducer 4 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + input vertices: + 0 Map 1 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) + sort order: +++ + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2477 Data size: 532794 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + a.* +from + src b, + alltypesorc a, + src c +where + a.csmallint = cast(b.key as int) and a.csmallint = (cast(c.key as int) + 0) + and (a.csmallint < 100) +order by a.csmallint, a.ctinyint, a.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-19 8 626923679 NULL -19.0 8.0 821UdmGbkEf4j NULL 1969-12-31 15:59:46.619 1969-12-31 15:59:46.95 true NULL +6 8 528534767 NULL 6.0 8.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.459 1969-12-31 16:00:00.236 true NULL +NULL 9 -470743566 -1887561756 NULL 9.0 swx5K33Sm5qcKR5B 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:07.318 true false +NULL 10 813877020 -1645852809 NULL 10.0 4QG23O2GKF6BUe13O7A2C xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.851 false false +-62 10 528534767 NULL -62.0 10.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.265 1969-12-31 15:59:56.584 true NULL +NULL 19 312515097 1864027286 NULL 19.0 ds5YqbRvhf3Sb2 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:56.211 false true +-7 19 528534767 NULL -7.0 19.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 16:00:13.994 1969-12-31 15:59:55.362 true NULL +-45 20 253665376 NULL -45.0 20.0 1cGVWH7n1QU NULL 1969-12-31 16:00:09.949 1969-12-31 16:00:10.979 true NULL +NULL 34 510824788 -1887561756 NULL 34.0 nj1bXoh6k 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:46.017 true false +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +41 37 528534767 NULL 41.0 37.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:52.817 1969-12-31 15:59:53.672 true NULL +61 41 528534767 NULL 61.0 41.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:55.708 1969-12-31 16:00:14.412 true NULL +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 58 -144190833 -1645852809 NULL 58.0 122J3HlhqBW1D43 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:46.315 true false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +NULL 67 790444583 -1645852809 NULL 67.0 xptM81y xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.622 false false +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +-46 72 626923679 NULL -46.0 72.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.411 1969-12-31 16:00:05.539 true NULL +39 74 626923679 NULL 39.0 74.0 821UdmGbkEf4j NULL 1969-12-31 16:00:10.403 1969-12-31 16:00:12.52 true NULL +47 74 626923679 NULL 47.0 74.0 821UdmGbkEf4j NULL 1969-12-31 15:59:57.849 1969-12-31 15:59:57.569 true NULL +-22 77 528534767 NULL -22.0 77.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:45.928 1969-12-31 15:59:43.621 true NULL