From f818e83ce722761f9bf8334cd9f6c14eeabe9276 Mon Sep 17 00:00:00 2001 From: Gopal V Date: Tue, 3 Nov 2015 00:12:05 -0800 Subject: [PATCH] BucketPruning simple DNF optimizer --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 5 + .../org/apache/hadoop/hive/ql/exec/Utilities.java | 17 + .../hadoop/hive/ql/exec/tez/BucketPrunerUtils.java | 37 + .../hive/ql/exec/tez/HiveSplitGenerator.java | 22 + .../ql/optimizer/FixedBucketPruningOptimizer.java | 320 +++ .../apache/hadoop/hive/ql/optimizer/Optimizer.java | 7 + .../apache/hadoop/hive/ql/parse/GenTezUtils.java | 4 + .../org/apache/hadoop/hive/ql/plan/MapWork.java | 14 + .../apache/hadoop/hive/ql/plan/TableScanDesc.java | 38 + .../test/queries/clientpositive/bucketpruning1.q | 97 + .../clientpositive/tez/bucketpruning1.q.out | 2360 ++++++++++++++++++++ 11 files changed, 2921 insertions(+) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/exec/tez/BucketPrunerUtils.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java create mode 100644 ql/src/test/queries/clientpositive/bucketpruning1.q create mode 100644 ql/src/test/results/clientpositive/tez/bucketpruning1.q.out diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3ab73ad..2ca2870 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2277,6 +2277,11 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { TEZ_MIN_PARTITION_FACTOR("hive.tez.min.partition.factor", 0.25f, "When auto reducer parallelism is enabled this factor will be used to put a lower limit to the number\n" + "of reducers that tez specifies."), + TEZ_OPTIMIZE_BUCKET_PRUNING( + "hive.tez.bucket.pruning", false, + "When pruning is enabled, filters on bucket columns will be processed by \n" + + "filtering the splits against a bitset of included buckets. This needs predicates \n"+ + "produced by hive.optimize.ppd and hive.optimize.index.filters."), TEZ_DYNAMIC_PARTITION_PRUNING( "hive.tez.dynamic.partition.pruning", true, "When dynamic pruning is enabled, joins on partition keys will be processed by sending\n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 665b3f7..f3c4a7e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1714,6 +1714,11 @@ public static void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws I Pattern.compile("^(.*?\\(.*\\))?([0-9]+)$"); /** + * This breaks a prefixed bucket number out into a single integer + */ + private static final Pattern PREFIXED_BUCKET_ID_REGEX = + Pattern.compile("^(0*([0-9]+))_([0-9]+).*"); + /** * Get the task id from the filename. It is assumed that the filename is derived from the output * of getTaskId * @@ -2136,6 +2141,18 @@ public static String getBucketFileNameFromPathSubString(String bucketName) { } } + public static int getBucketIdFromFile(String bucketName) { + Matcher m = PREFIXED_BUCKET_ID_REGEX.matcher(bucketName); + if (m.matches()) { + if (m.group(2).isEmpty()) { + // all zeros + return m.group(1).isEmpty() ? -1 : 0; + } + return Integer.parseInt(m.group(2)); + } + return -1; + } + public static String getNameMessage(Exception e) { return e.getClass().getName() + "(" + e.getMessage() + ")"; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/BucketPrunerUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/BucketPrunerUtils.java new file mode 100644 index 0000000..bdbc01b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/BucketPrunerUtils.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez; + +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.FileSplit; + +/** + * BucketPrunerUtils has helper functions takes a list of included buckets + * at runtime (split generation) and prunes them using bucket filter conditions. + */ +public class BucketPrunerUtils { + public static int parseSplitBucket(InputSplit split) { + if (split instanceof FileSplit) { + return Utilities.getBucketIdFromFile(((FileSplit) split) + .getPath().getName()); + } + return -1; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java index 2ab3328..9f3647e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java @@ -19,11 +19,14 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.Comparator; import java.util.List; import com.google.common.base.Preconditions; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -163,6 +166,25 @@ public HiveSplitGenerator(InputInitializerContext initializerContext) throws IOE LOG.info("Number of input splits: " + splits.length + ". " + availableSlots + " available slots, " + waves + " waves. Input format is: " + realInputFormatName); + if (work.getIncludedBuckets() != null) { + final BitSet buckets = work.getIncludedBuckets(); + List filteredSplits = new ArrayList(splits.length/2); + for (InputSplit split : splits) { + final int bucket = BucketPrunerUtils.parseSplitBucket(split); + if (bucket < 0 || buckets.get(bucket)) { + // match or UNKNOWN + filteredSplits.add(split); + } else { + // TODO: remove after fixing bugs + LOG.info("GOPAL: Bucket pruning " + split); + } + } + if (filteredSplits.size() < splits.length) { + // reallocate only if any filters pruned + splits = filteredSplits.toArray(new InputSplit[filteredSplits.size()]); + } + } + Multimap groupedSplits = splitGrouper.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots); // And finally return them in a flat array diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java new file mode 100644 index 0000000..d2668f3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java @@ -0,0 +1,320 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; +import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree; +import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree.Operator; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory.FilterPruner; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +import com.google.common.base.Preconditions; + +/** + * Fixed bucket pruning optimizer goes through all the table scans and annotates them + * with a bucketing inclusion bit-set. + */ +public class FixedBucketPruningOptimizer implements Transform { + + private static final Log LOG = LogFactory + .getLog(FixedBucketPruningOptimizer.class.getName()); + + public class NoopWalker implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + // do nothing + return null; + } + } + + public class FixedBucketPartitionWalker extends FilterPruner { + + @Override + protected void generatePredicate(NodeProcessorCtx procCtx, + FilterOperator fop, TableScanOperator top) throws SemanticException, + UDFArgumentException { + FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx); + Table tbl = top.getConf().getTableMetadata(); + if (tbl.getNumBuckets() > 0) { + final int nbuckets = tbl.getNumBuckets(); + ctxt.setNumBuckets(nbuckets); + ctxt.setBucketCols(tbl.getBucketCols()); + ctxt.setSchema(tbl.getFields()); + if (tbl.isPartitioned()) { + // Run partition pruner to get partitions + ParseContext parseCtx = ctxt.pctx; + PrunedPartitionList prunedPartList; + try { + String alias = (String) parseCtx.getTopOps().keySet().toArray()[0]; + prunedPartList = PartitionPruner.prune(top, parseCtx, alias); + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + throw new SemanticException(e.getMessage(), e); + } + if (prunedPartList != null) { + ctxt.setPartitions(prunedPartList); + for (Partition p : prunedPartList.getPartitions()) { + if (nbuckets != p.getBucketCount()) { + // disable feature + ctxt.setNumBuckets(-1); + break; + } + } + } + } + } + } + } + + public static class BucketBitsetGenerator extends FilterPruner { + + @Override + protected void generatePredicate(NodeProcessorCtx procCtx, + FilterOperator fop, TableScanOperator top) throws SemanticException, + UDFArgumentException { + FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx); + if (ctxt.getNumBuckets() <= 0 || ctxt.getBucketCols().size() != 1) { + // simplify + return; + } + ExprNodeGenericFuncDesc filter = top.getConf().getFilterExpr(); + if (filter == null) { + return; + } + // the sargs are closely tied to hive.optimize.index.filter + SearchArgument sarg = ConvertAstToSearchArg.create(filter); + if (sarg == null) { + return; + } + final String bucketCol = ctxt.getBucketCols().get(0); + StructField bucketField = null; + for (StructField fs : ctxt.getSchema()) { + if(fs.getFieldName().equals(bucketCol)) { + bucketField = fs; + } + } + Preconditions.checkArgument(bucketField != null); + List literals = new ArrayList(); + List leaves = sarg.getLeaves(); + Set bucketLeaves = new HashSet(); + for (PredicateLeaf l : leaves) { + if (bucketCol.equals(l.getColumnName())) { + switch (l.getOperator()) { + case EQUALS: + case IN: + // supported + break; + case IS_NULL: + // TODO: (a = 1) and NOT (a is NULL) can be potentially folded earlier + // fall through + case BETWEEN: + // TODO: for ordinal types you can produce a range (BETWEEN 1444442100 1444442107) + // fall through + default: + // cannot optimize any others + return; + } + bucketLeaves.add(l); + } + } + if (bucketLeaves.size() == 0) { + return; + } + // TODO: optimize properly by converting from CNF to DNF + // first-cut takes a known minimal tree and no others. + // $expr = (a=1) + // (a=1 or a=2) + // (a in (1,2)) + // ($expr and *) + // (* and $expr) + ExpressionTree expr = sarg.getExpression(); + if (expr.getOperator() == Operator.LEAF) { + PredicateLeaf l = leaves.get(expr.getLeaf()); + if (!addLiteral(literals, l)) { + return; + } + } else if (expr.getOperator() == Operator.AND) { + boolean found = false; + for (ExpressionTree subExpr : expr.getChildren()) { + if (subExpr.getOperator() != Operator.LEAF) { + return; + } + // one of the branches is definitely a bucket-leaf + PredicateLeaf l = leaves.get(subExpr.getLeaf()); + if (bucketLeaves.contains(l)) { + if (!addLiteral(literals, l)) { + return; + } + found = true; + } + } + if (!found) { + return; + } + } else if (expr.getOperator() == Operator.OR) { + for (ExpressionTree subExpr : expr.getChildren()) { + if (subExpr.getOperator() != Operator.LEAF) { + return; + } + PredicateLeaf l = leaves.get(subExpr.getLeaf()); + if (bucketLeaves.contains(l)) { + if (!addLiteral(literals, l)) { + return; + } + } else { + // all of the OR branches need to be bucket-leaves + return; + } + } + } + // invariant: bucket-col IN literals of type bucketField + BitSet bs = new BitSet(ctxt.getNumBuckets()); + bs.clear(); + PrimitiveObjectInspector bucketOI = (PrimitiveObjectInspector)bucketField.getFieldObjectInspector(); + PrimitiveObjectInspector constOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(bucketOI.getPrimitiveCategory()); + for (Object literal: literals) { + PrimitiveObjectInspector origOI = PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(literal.getClass()); + Converter conv = ObjectInspectorConverters.getConverter(origOI, constOI); + // exact type conversion or get out + if (conv == null) { + return; + } + Object convCols[] = new Object[] {conv.convert(literal)}; + int n = ObjectInspectorUtils.getBucketNumber(convCols, new ObjectInspector[]{constOI}, ctxt.getNumBuckets()); + bs.set(n); + } + if (bs.cardinality() < ctxt.getNumBuckets()) { + // there is a valid bucket pruning filter + top.getConf().setIncludedBuckets(bs); + top.getConf().setNumBuckets(ctxt.getNumBuckets()); + } + } + + private boolean addLiteral(List literals, PredicateLeaf leaf) { + switch (leaf.getOperator()) { + case EQUALS: + return literals.add(leaf.getLiteral()); + case IN: + return literals.addAll(leaf.getLiteralList()); + default: + return false; + } + } + } + + public final class FixedBucketPruningOptimizerCtxt implements + NodeProcessorCtx { + public final ParseContext pctx; + private int numBuckets; + private PrunedPartitionList partitions; + private List bucketCols; + private List schema; + + public FixedBucketPruningOptimizerCtxt(ParseContext pctx) { + this.pctx = pctx; + } + + public void setSchema(ArrayList fields) { + this.schema = fields; + } + + public List getSchema() { + return this.schema; + } + + public void setBucketCols(List bucketCols) { + this.bucketCols = bucketCols; + } + + public List getBucketCols() { + return this.bucketCols; + } + + public void setPartitions(PrunedPartitionList partitions) { + this.partitions = partitions; + } + + public PrunedPartitionList getPartitions() { + return this.partitions; + } + + public int getNumBuckets() { + return numBuckets; + } + + public void setNumBuckets(int numBuckets) { + this.numBuckets = numBuckets; + } + } + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + // create a the context for walking operators + FixedBucketPruningOptimizerCtxt opPartWalkerCtx = new FixedBucketPruningOptimizerCtxt( + pctx); + + // Retrieve all partitions generated from partition pruner and partition + // column pruner + PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx, + new FixedBucketPartitionWalker(), new NoopWalker()); + + if (opPartWalkerCtx.getNumBuckets() < 0) { + // bail out + return pctx; + } else { + // walk operator tree to create expression tree for filter buckets + PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx, + new BucketBitsetGenerator(), new NoopWalker()); + } + + return pctx; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 6347872..4a7fc0d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -166,6 +166,13 @@ public void initialize(HiveConf hiveConf) { transformations.add(new JoinReorder()); } + if (HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.TEZ_OPTIMIZE_BUCKET_PRUNING) + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) { + transformations.add(new FixedBucketPruningOptimizer()); + } + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONING) && HiveConf.getVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTDYNAMICPARTITION) && diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index c5f7426..27d7276 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -184,6 +184,10 @@ public MapWork createMapWork(GenTezProcContext context, Operator root, mapWork.setDummyTableScan(true); } + if (ts.getConf().getNumBuckets() > 0) { + mapWork.setIncludedBuckets(ts.getConf().getIncludedBuckets()); + } + // add new item to the tez work tezWork.add(mapWork); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 87c15a2..6c34412 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; @@ -131,6 +132,10 @@ private boolean doSplitsGrouping = true; + // bitsets can't be correctly serialized by Kryo's default serializer + // BitSet::wordsInUse is transient, so force dumping into a lower form + private byte[] includedBuckets; + /** Whether LLAP IO will be used for inputs. */ private String llapIoDesc; @@ -617,4 +622,13 @@ public void setBaseSrc(String[] baseSrc) { public void setMapAliases(List mapAliases) { this.mapAliases = mapAliases; } + + public BitSet getIncludedBuckets() { + return includedBuckets != null ? BitSet.valueOf(includedBuckets) : null; + } + + public void setIncludedBuckets(BitSet includedBuckets) { + // see comment next to the field + this.includedBuckets = includedBuckets.toByteArray(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 6661ce6..fb378f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.Map; @@ -105,6 +106,10 @@ private transient Table tableMetadata; + private BitSet includedBuckets; + + private int numBuckets = -1; + public TableScanDesc() { this(null, null); } @@ -312,4 +317,37 @@ public String getSerializedFilterObject() { public void setSerializedFilterObject(String serializedFilterObject) { this.serializedFilterObject = serializedFilterObject; } + + public void setIncludedBuckets(BitSet bitset) { + this.includedBuckets = bitset; + } + + public BitSet getIncludedBuckets() { + return this.includedBuckets; + } + + @Explain(displayName = "buckets included", explainLevels = { Level.EXTENDED }) + public String getIncludedBucketExplain() { + if (this.includedBuckets == null) { + return null; + } + + StringBuilder sb = new StringBuilder(); + sb.append("["); + for (int i = 0; i < this.includedBuckets.size(); i++) { + if (this.includedBuckets.get(i)) { + sb.append(String.format("%d,", i)); + } + } + sb.append(String.format("] of %d", numBuckets)); + return sb.toString(); + } + + public int getNumBuckets() { + return numBuckets; + } + + public void setNumBuckets(int numBuckets) { + this.numBuckets = numBuckets; + } } diff --git ql/src/test/queries/clientpositive/bucketpruning1.q ql/src/test/queries/clientpositive/bucketpruning1.q new file mode 100644 index 0000000..be403a5 --- /dev/null +++ ql/src/test/queries/clientpositive/bucketpruning1.q @@ -0,0 +1,97 @@ +set hive.optimize.ppd=true; +set hive.optimize.index.filter=true; +set hive.tez.bucket.pruning=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; + +CREATE TABLE srcbucket_pruned(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 16 BUCKETS STORED AS TEXTFILE; + +-- cannot prune 2-key scenarios without a smarter optimizer +CREATE TABLE srcbucket_unpruned(key int, value string) partitioned by (ds string) CLUSTERED BY (key,value) INTO 16 BUCKETS STORED AS TEXTFILE; + +-- good cases + +explain extended +select * from srcbucket_pruned where key = 1; + +explain extended +select * from srcbucket_pruned where key = 16; + +explain extended +select * from srcbucket_pruned where key = 17; + +explain extended +select * from srcbucket_pruned where key = 16+1; + +explain extended +select * from srcbucket_pruned where key = '11'; + +explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08'; + +explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and value='One'; + +explain extended +select * from srcbucket_pruned where value='One' and key = 1 and ds='2008-04-08'; + +explain extended +select * from srcbucket_pruned where key in (2,3); + +explain extended +select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08'; + +explain extended +select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08' and value='One'; + +explain extended +select * from srcbucket_pruned where value='One' and key in (2,3) and ds='2008-04-08'; + +explain extended +select * from srcbucket_pruned where (key=1 or key=2) and ds='2008-04-08'; + +explain extended +select * from srcbucket_pruned where (key=1 or key=2) and value = 'One' and ds='2008-04-08'; + +-- valid but irrelevant case (all buckets selected) + +explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17); + +explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08'; + +explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08' and value='One'; + +explain extended +select * from srcbucket_pruned where value='One' and key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08'; + +-- valid, but unimplemented cases + +explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' or key = 2; + +explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and (value='One' or value = 'Two'); + +explain extended +select * from srcbucket_pruned where key = 1 or value = "One" or key = 2 and value = "Two"; + +-- Invalid cases + +explain extended +select * from srcbucket_pruned where key = 'x11'; + +explain extended +select * from srcbucket_pruned where key = 1 or value = "One"; + +explain extended +select * from srcbucket_pruned where key = 1 or value = "One" or key = 2; + +explain extended +select * from srcbucket_unpruned where key in (3, 5); + +explain extended +select * from srcbucket_unpruned where key = 1; + diff --git ql/src/test/results/clientpositive/tez/bucketpruning1.q.out ql/src/test/results/clientpositive/tez/bucketpruning1.q.out new file mode 100644 index 0000000..3b90687 --- /dev/null +++ ql/src/test/results/clientpositive/tez/bucketpruning1.q.out @@ -0,0 +1,2360 @@ +PREHOOK: query: CREATE TABLE srcbucket_pruned(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 16 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_pruned +POSTHOOK: query: CREATE TABLE srcbucket_pruned(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 16 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_pruned +PREHOOK: query: -- cannot prune 2-key scenarios without a smarter optimizer +CREATE TABLE srcbucket_unpruned(key int, value string) partitioned by (ds string) CLUSTERED BY (key,value) INTO 16 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_unpruned +POSTHOOK: query: -- cannot prune 2-key scenarios without a smarter optimizer +CREATE TABLE srcbucket_unpruned(key int, value string) partitioned by (ds string) CLUSTERED BY (key,value) INTO 16 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_unpruned +PREHOOK: query: -- good cases + +explain extended +select * from srcbucket_pruned where key = 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- good cases + +explain extended +select * from srcbucket_pruned where key = 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + key + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (key = 1) (type: boolean) + buckets included: [1,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 1 (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 16 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 16 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + key + 16 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (key = 16) (type: boolean) + buckets included: [0,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 16) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 16 (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 17 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 17 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + key + 17 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (key = 17) (type: boolean) + buckets included: [1,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 17) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 17 (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 16+1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 16+1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + key + + + 16 + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (key = 17) (type: boolean) + buckets included: [1,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 17) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 17 (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = '11' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = '11' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + key + '11' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (key = 11) (type: boolean) + buckets included: [11,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 11) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 11 (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((key = 1) and (ds = '2008-04-08')) (type: boolean) + buckets included: [1,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 1) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 1 (type: int), value (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and value='One' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and value='One' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + value + 'One' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((key = 1) and (ds = '2008-04-08')) and (value = 'One')) (type: boolean) + buckets included: [1,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key = 1) and (ds = '2008-04-08')) and (value = 'One')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 1 (type: int), 'One' (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where value='One' and key = 1 and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where value='One' and key = 1 and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + value + 'One' + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((value = 'One') and (key = 1)) and (ds = '2008-04-08')) (type: boolean) + buckets included: [1,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((value = 'One') and (key = 1)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 1 (type: int), 'One' (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key in (2,3) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key in (2,3) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 2 + 3 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (key) IN (2, 3) (type: boolean) + buckets included: [2,3,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key) IN (2, 3) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 2 + 3 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((key) IN (2, 3) and (ds = '2008-04-08')) (type: boolean) + buckets included: [2,3,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key) IN (2, 3) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08' and value='One' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key in (2,3) and ds='2008-04-08' and value='One' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 2 + 3 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + value + 'One' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((key) IN (2, 3) and (ds = '2008-04-08')) and (value = 'One')) (type: boolean) + buckets included: [2,3,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key) IN (2, 3) and (ds = '2008-04-08')) and (value = 'One')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where value='One' and key in (2,3) and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where value='One' and key in (2,3) and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + value + 'One' + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 2 + 3 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((value = 'One') and (key) IN (2, 3)) and (ds = '2008-04-08')) (type: boolean) + buckets included: [2,3,] of 16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((value = 'One') and (key) IN (2, 3)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where (key=1 or key=2) and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where (key=1 or key=2) and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + or + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + key + 2 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where (key=1 or key=2) and value = 'One' and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where (key=1 or key=2) and value = 'One' and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + or + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + key + 2 + = + TOK_TABLE_OR_COL + value + 'One' + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((((key = 1) or (key = 2)) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((((key = 1) or (key = 2)) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- valid but irrelevant case (all buckets selected) + +explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) +PREHOOK: type: QUERY +POSTHOOK: query: -- valid but irrelevant case (all buckets selected) + +explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08' and value='One' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08' and value='One' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + value + 'One' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (ds = '2008-04-08')) and (value = 'One')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (ds = '2008-04-08')) and (value = 'One')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where value='One' and key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where value='One' and key in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) and ds='2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + value + 'One' + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- valid, but unimplemented cases + +explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' or key = 2 +PREHOOK: type: QUERY +POSTHOOK: query: -- valid, but unimplemented cases + +explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' or key = 2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + key + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((key = 1) and (ds = '2008-04-08')) or (key = 2)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key = 1) and (ds = '2008-04-08')) or (key = 2)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and (value='One' or value = 'Two') +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and (value='One' or value = 'Two') +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + or + = + TOK_TABLE_OR_COL + value + 'One' + = + TOK_TABLE_OR_COL + value + 'Two' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: (((key = 1) and (ds = '2008-04-08')) and ((value = 'One') or (value = 'Two'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key = 1) and (ds = '2008-04-08')) and ((value = 'One') or (value = 'Two'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 1 (type: int), value (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 or value = "One" or key = 2 and value = "Two" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 or value = "One" or key = 2 and value = "Two" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + or + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + value + "One" + and + = + TOK_TABLE_OR_COL + key + 2 + = + TOK_TABLE_OR_COL + value + "Two" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((key = 1) or (value = 'One') or ((key = 2) and (value = 'Two'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 1) or (value = 'One') or ((key = 2) and (value = 'Two'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Invalid cases + +explain extended +select * from srcbucket_pruned where key = 'x11' +PREHOOK: type: QUERY +POSTHOOK: query: -- Invalid cases + +explain extended +select * from srcbucket_pruned where key = 'x11' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + key + 'x11' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 or value = "One" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 or value = "One" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + value + "One" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((key = 1) or (value = 'One')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 1) or (value = 'One')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 or value = "One" or key = 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_pruned where key = 1 or value = "One" or key = 2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_pruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + or + = + TOK_TABLE_OR_COL + key + 1 + = + TOK_TABLE_OR_COL + value + "One" + = + TOK_TABLE_OR_COL + key + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_pruned + filterExpr: ((key = 1) or (value = 'One') or (key = 2)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 1) or (value = 'One') or (key = 2)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_unpruned where key in (3, 5) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_unpruned where key in (3, 5) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_unpruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + TOK_FUNCTION + in + TOK_TABLE_OR_COL + key + 3 + 5 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_unpruned + filterExpr: (key) IN (3, 5) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key) IN (3, 5) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * from srcbucket_unpruned where key = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from srcbucket_unpruned where key = 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcbucket_unpruned + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + TOK_TABLE_OR_COL + key + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_unpruned + filterExpr: (key = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 1 (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + -- 2.4.0