diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index da171b1..66c5560 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1190,6 +1190,10 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."), HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true, "Whether to transform OR clauses in Filter operators into IN clauses"), + HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31, + "Minimum number of OR clauses needed to transform into IN clauses"), + HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true, + "Extract partial expressions when optimizing point lookup IN clauses"), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 14f362f..439f616 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -68,6 +68,18 @@ public void initialize(HiveConf hiveConf) { // Add the transformation that computes the lineage information. transformations.add(new Generator()); + + // Try to transform OR predicates in Filter into simpler IN clauses first + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { + final int min = HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); + final boolean extract = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT); + final boolean testMode = HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_IN_TEST); + transformations.add(new PointLookupOptimizer(min, extract, testMode)); + } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { transformations.add(new PredicateTransitivePropagate()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { @@ -82,11 +94,6 @@ public void initialize(HiveConf hiveConf) { transformations.add(new ConstantPropagate()); } - // Try to transform OR predicates in Filter into IN clauses. - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { - transformations.add(new PointLookupOptimizer()); - } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { transformations.add(new PartitionPruner()); transformations.add(new PartitionConditionRemover()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java index 6a8acec..8414086 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -18,10 +18,14 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Stack; import org.apache.calcite.util.Pair; @@ -46,15 +50,18 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.ListMultimap; /** @@ -71,7 +78,49 @@ GenericUDFIn.class.getAnnotation(Description.class).name(); private static final String STRUCT_UDF = GenericUDFStruct.class.getAnnotation(Description.class).name(); + private static final String AND_UDF = + GenericUDFOPAnd.class.getAnnotation(Description.class).name(); + + // these are closure-bound for all the walkers in context + public final int minOrExpr; + public final boolean extract; + public final boolean testMode; + + /* + * Pass in configs and pre-create a parse context + */ + public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) { + this.minOrExpr = min; + this.extract = extract; + this.testMode = testMode; + } + // Hash Set iteration isn't ordered, but force string sorted order + // to get a consistent test run. + private Collection sortForTests( + Set valuesExpr) { + if (!testMode) { + // normal case - sorting is wasted for an IN() + return valuesExpr; + } + final Collection sortedValues; + + sortedValues = ImmutableSortedSet.copyOf( + new Comparator() { + @Override + public int compare(ExprNodeDescEqualityWrapper w1, + ExprNodeDescEqualityWrapper w2) { + // fail if you find nulls (this is a test-code section) + if (w1.equals(w2)) { + return 0; + } + return w1.getExprNodeDesc().getExprString() + .compareTo(w2.getExprNodeDesc().getExprString()); + } + }, valuesExpr); + + return sortedValues; + } @Override public ParseContext transform(ParseContext pctx) throws SemanticException { @@ -140,8 +189,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - // 2. It is an OR operator + // 2. It is an OR operator with enough children List children = fd.getChildren(); + if (children.size() < minOrExpr) { + return null; + } ListMultimap> columnConstantsMap = ArrayListMultimap.create(); boolean modeAnd = false; @@ -272,6 +324,50 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); + if (extract && columns.size() > 1) { + final List subExpr = new ArrayList(columns.size()+1); + + // extract pre-conditions for the tuple expressions + // (a,b) IN ((1,2),(2,3)) -> + // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3)) + + for (String keyString : columnConstantsMap.keySet()) { + final Set valuesExpr = + new HashSet(children.size()); + final List> partial = + columnConstantsMap.get(keyString); + for (int i = 0; i < children.size(); i++) { + Pair columnConstant = partial + .get(i); + valuesExpr + .add(new ExprNodeDescEqualityWrapper(columnConstant.right)); + } + ExprNodeColumnDesc lookupCol = partial.get(0).left; + // generate a partial IN clause, if the column is a partition column + if (lookupCol.getIsPartitionColOrVirtualCol() + || valuesExpr.size() < children.size()) { + // optimize only nDV reductions + final List inExpr = new ArrayList(); + inExpr.add(lookupCol); + for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) { + inExpr.add(value.getExprNodeDesc()); + } + subExpr.add(new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry + .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr)); + } + } + // loop complete, inspect the sub expressions generated + if (subExpr.size() > 0) { + // add the newPredicate to the end & produce an AND clause + subExpr.add(newPredicate); + newPredicate = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry + .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + } + // else, newPredicate is unmodified + } + return newPredicate; } diff --git ql/src/test/queries/clientpositive/flatten_and_or.q ql/src/test/queries/clientpositive/flatten_and_or.q index 6d65225..6c6e0f9 100644 --- ql/src/test/queries/clientpositive/flatten_and_or.q +++ ql/src/test/queries/clientpositive/flatten_and_or.q @@ -1,3 +1,5 @@ +set hive.optimize.point.lookup=false; + explain SELECT key FROM src @@ -14,4 +16,4 @@ WHERE AND value = '1') OR (key = '9' AND value = '1') OR (key = '10' AND value = '3')) -; \ No newline at end of file +; diff --git ql/src/test/queries/clientpositive/pointlookup.q ql/src/test/queries/clientpositive/pointlookup.q new file mode 100644 index 0000000..1aef2ef --- /dev/null +++ ql/src/test/queries/clientpositive/pointlookup.q @@ -0,0 +1,59 @@ +explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +; + + +set hive.optimize.point.lookup.min=3; +set hive.optimize.point.lookup.extract=false; + +explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +; + +set hive.optimize.point.lookup.extract=true; + +explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +; diff --git ql/src/test/queries/clientpositive/pointlookup2.q ql/src/test/queries/clientpositive/pointlookup2.q new file mode 100644 index 0000000..31bebbb --- /dev/null +++ ql/src/test/queries/clientpositive/pointlookup2.q @@ -0,0 +1,51 @@ +drop table pcr_t1; +drop table pcr_t2; +drop table pcr_t3; + +create table pcr_t1 (key int, value string) partitioned by (ds string); +insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key; +insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key; +insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key; + +create table pcr_t2 (ds string, key int, value string); +from pcr_t1 +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'; +from pcr_t1 +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2; + +set hive.optimize.point.lookup.min=2; +set hive.optimize.point.lookup.extract=true; + +explain extended +select key, value, ds +from pcr_t1 +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) +order by key, value, ds; + +explain extended +select * +from pcr_t1 t1 join pcr_t1 t2 +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08' +order by t1.key; + +explain extended +select * +from pcr_t1 t1 join pcr_t1 t2 +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09' +order by t1.key; + +explain extended +select * +from pcr_t1 t1 join pcr_t2 t2 +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2) +order by t2.key, t2.value, t1.ds; + +explain extended +select * +from pcr_t1 t1 join pcr_t2 t2 +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2) +order by t1.key, t1.value, t2.ds; + +drop table pcr_t1; +drop table pcr_t2; +drop table pcr_t3; \ No newline at end of file diff --git ql/src/test/results/clientpositive/alter_partition_coltype.q.out ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 06515da..9fc3c8d 100644 --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -1134,15 +1134,11 @@ STAGE PLANS: alias: alterdynamic_part_table Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean) + Select Operator + expressions: intcol (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 9e0e78a..054b573 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -678,15 +678,15 @@ STAGE PLANS: alias: loc_orc Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (state) IN ('OH', 'CA') (type: boolean) - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((state = 'OH') or (state = 'CA')) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/flatten_and_or.q.out ql/src/test/results/clientpositive/flatten_and_or.q.out index 5f25daa..9c51ff3 100644 --- ql/src/test/results/clientpositive/flatten_and_or.q.out +++ ql/src/test/results/clientpositive/flatten_and_or.q.out @@ -44,15 +44,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 4c9ea77..d7c40a3 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -2475,16 +2475,16 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2588,13 +2588,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/pointlookup.q.out ql/src/test/results/clientpositive/pointlookup.q.out new file mode 100644 index 0000000..7e19be4 --- /dev/null +++ ql/src/test/results/clientpositive/pointlookup.q.out @@ -0,0 +1,198 @@ +PREHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/pointlookup2.q.out ql/src/test/results/clientpositive/pointlookup2.q.out new file mode 100644 index 0000000..55edd90 --- /dev/null +++ ql/src/test/results/clientpositive/pointlookup2.q.out @@ -0,0 +1,1647 @@ +PREHOOK: query: drop table pcr_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcr_t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcr_t3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_t1 +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds=2000-04-08 +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds=2000-04-09 +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds=2000-04-10 +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds=2000-04-10 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table pcr_t2 (ds string, key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_t2 +POSTHOOK: query: create table pcr_t2 (ds string, key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_t2 +PREHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Output: default@pcr_t2 +POSTHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcr_t2 +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2 +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Output: default@pcr_t2 +POSTHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcr_t2 +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain extended +select key, value, ds +from pcr_t1 +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) +order by key, value, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value, ds +from pcr_t1 +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) +order by key, value, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t1 t2 +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08' +order by t1.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t1 t2 +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08' +order by t1.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t1 + t2 + and + and + = + . + TOK_TABLE_OR_COL + t1 + key + . + TOK_TABLE_OR_COL + t2 + key + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false + TableScan + alias: t2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [t1, t2] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6, _col7 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3,_col4 + columns.types int,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3,_col4 + columns.types int,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3,_col4 + columns.types int,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:string:string:int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t1 t2 +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09' +order by t1.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t1 t2 +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09' +order by t1.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t1 + t2 + and + and + = + . + TOK_TABLE_OR_COL + t1 + key + . + TOK_TABLE_OR_COL + t2 + key + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-09' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false + TableScan + alias: t2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [t1] + /pcr_t1/ds=2000-04-09 [t2] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col6, _col7 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3,_col4 + columns.types int,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3,_col4 + columns.types int,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3,_col4 + columns.types int,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-09' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:string:string:int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t2 t2 +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2) +order by t2.key, t2.value, t1.ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t2 t2 +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2) +order by t2.key, t2.value, t1.ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t2 + t2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + and + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t2 + key + 1 + and + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-09' + = + . + TOK_TABLE_OR_COL + t2 + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t2 + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t2 + value + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + sort order: + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int), value (type: string), ds (type: string) + auto parallelism: false + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: ds (type: string), key (type: int), value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: pcr_t2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns ds,key,value + columns.comments + columns.types string:int:string +#### A masked pattern was here #### + name default.pcr_t2 + numFiles 1 + numRows 1 + rawDataSize 18 + serialization.ddl struct pcr_t2 { string ds, i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 19 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns ds,key,value + columns.comments + columns.types string:int:string +#### A masked pattern was here #### + name default.pcr_t2 + numFiles 1 + numRows 1 + rawDataSize 18 + serialization.ddl struct pcr_t2 { string ds, i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 19 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t2 + name: default.pcr_t2 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [t1] + /pcr_t1/ds=2000-04-09 [t1] + /pcr_t2 [t2] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean) + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:string:string:string:int:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t2 t2 +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2) +order by t1.key, t1.value, t2.ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * +from pcr_t1 t1 join pcr_t2 t2 +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2) +order by t1.key, t1.value, t2.ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t2 + t2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + or + and + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t1 + key + 1 + and + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-09' + = + . + TOK_TABLE_OR_COL + t1 + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + value + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t2 + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + sort order: + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int), value (type: string), ds (type: string) + auto parallelism: false + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: ds (type: string), key (type: int), value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: pcr_t2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns ds,key,value + columns.comments + columns.types string:int:string +#### A masked pattern was here #### + name default.pcr_t2 + numFiles 1 + numRows 1 + rawDataSize 18 + serialization.ddl struct pcr_t2 { string ds, i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 19 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns ds,key,value + columns.comments + columns.types string:int:string +#### A masked pattern was here #### + name default.pcr_t2 + numFiles 1 + numRows 1 + rawDataSize 18 + serialization.ddl struct pcr_t2 { string ds, i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 19 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t2 + name: default.pcr_t2 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [t1] + /pcr_t1/ds=2000-04-09 [t1] + /pcr_t1/ds=2000-04-10 [t1] + /pcr_t2 [t2] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (struct(_col0,_col6)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + sort order: +++ + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:string:string:string:int:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table pcr_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pcr_t1 +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: drop table pcr_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Output: default@pcr_t1 +PREHOOK: query: drop table pcr_t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pcr_t2 +PREHOOK: Output: default@pcr_t2 +POSTHOOK: query: drop table pcr_t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pcr_t2 +POSTHOOK: Output: default@pcr_t2 +PREHOOK: query: drop table pcr_t3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t3 +POSTHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/ppd_transform.q.out ql/src/test/results/clientpositive/ppd_transform.q.out index f536767..17248e4 100644 --- ql/src/test/results/clientpositive/ppd_transform.q.out +++ ql/src/test/results/clientpositive/ppd_transform.q.out @@ -390,21 +390,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0) IN ('a', 'b') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: (_col0) IN ('c', 'd') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index 5aa0df8..fb08f10 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -2534,16 +2534,16 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2648,13 +2648,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/ppd_transform.q.out ql/src/test/results/clientpositive/spark/ppd_transform.q.out index a6e6e38..52a847a 100644 --- ql/src/test/results/clientpositive/spark/ppd_transform.q.out +++ ql/src/test/results/clientpositive/spark/ppd_transform.q.out @@ -405,21 +405,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0) IN ('a', 'b') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: (_col0) IN ('c', 'd') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 54003c3..c2250e6 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: alltypesorc Filter Operator - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index e8a9786..9756b0c 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -2909,7 +2909,7 @@ Stage-0 Select Operator [SEL_2] outputColumnNames:["_col0"] Filter Operator [FIL_4] - predicate:(c_int) IN (-6, 6) (type: boolean) + predicate:((c_int = -6) or (c_int = 6)) (type: boolean) TableScan [TS_0] alias:cbo_t1 diff --git ql/src/test/results/clientpositive/tez/vectorized_case.q.out ql/src/test/results/clientpositive/tez/vectorized_case.q.out index 54003c3..c2250e6 100644 --- ql/src/test/results/clientpositive/tez/vectorized_case.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_case.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: alltypesorc Filter Operator - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 9e47014..73bf12d 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -46,19 +46,20 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator