diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveOrToInClauseRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveOrToInClauseRule.java new file mode 100644 index 0000000..e44b549 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveOrToInClauseRule.java @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ListMultimap; + + +public class HiveOrToInClauseRule extends RelOptRule { + + protected static final Log LOG = LogFactory + .getLog(HiveOrToInClauseRule.class.getName()); + + + public static final HiveOrToInClauseRule INSTANCE = + new HiveOrToInClauseRule(); + + + private HiveOrToInClauseRule() { + super(operand(Filter.class, any())); + } + + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 1. We try to transform possible candidates + RexNode newCondition; + switch (condition.getKind()) { + case AND: + ImmutableList operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); + List newOperands = new ArrayList(); + for (RexNode operand: operands) { + RexNode newOperand; + if (operand.getKind() == SqlKind.OR) { + try { + newOperand = transformIntoInClauseCondition(rexBuilder, + filter.getRowType(), operand); + if (newOperand == null) { + return; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePreFilteringRule", e); + return; + } + } else { + newOperand = operand; + } + newOperands.add(newOperand); + } + newCondition = RexUtil.composeConjunction(rexBuilder, newOperands, false); + break; + case OR: + try { + newCondition = transformIntoInClauseCondition(rexBuilder, + filter.getRowType(), condition); + if (newCondition == null) { + return; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePreFilteringRule", e); + return; + } + break; + default: + return; + } + + // 2. If we could not transform anything, we bail out + if (newCondition.toString().equals(condition.toString())) { + return; + } + + // 3. We create the filter with the new condition + RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition); + + call.transformTo(newFilter); + + } + + private static RexNode transformIntoInClauseCondition(RexBuilder rexBuilder, RelDataType inputSchema, + RexNode condition) throws SemanticException { + assert condition.getKind() == SqlKind.OR; + + // 1. We extract the information necessary to create the predicate for the new + // filter + ListMultimap columnConstantsMap = ArrayListMultimap.create(); + ImmutableList operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + + final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand); + final List conjunctions = RelOptUtil.conjunctions(operandCNF); + + for (RexNode conjunction: conjunctions) { + // 1.1. If it is not a RexCall, we bail out + if (!(conjunction instanceof RexCall)) { + return null; + } + // 1.2. We extract the information that we need + RexCall conjCall = (RexCall) conjunction; + if(conjCall.getOperator().getKind() == SqlKind.EQUALS) { + if (conjCall.operands.get(0) instanceof RexInputRef && + conjCall.operands.get(1) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(0); + RexLiteral literal = (RexLiteral) conjCall.operands.get(1); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else if (conjCall.operands.get(1) instanceof RexInputRef && + conjCall.operands.get(0) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(1); + RexLiteral literal = (RexLiteral) conjCall.operands.get(0); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else { + // Bail out + return null; + } + } else { + return null; + } + } + } + + // 3. We build the new predicate and return it + List newOperands = new ArrayList(operands.size()); + // 3.1 Create structs + List columns = new ArrayList(); + List names = new ArrayList(); + ImmutableList.Builder paramsTypes = ImmutableList.builder(); + List structReturnType = new ArrayList(); + ImmutableList.Builder newOperandsTypes = ImmutableList.builder(); + for (int i = 0; i < operands.size(); i++) { + List constantFields = new ArrayList(operands.size()); + + for (RexInputRef ref : columnConstantsMap.keySet()) { + // If any of the elements was not referenced by every operand, we bail out + if (columnConstantsMap.get(ref).size() <= i) { + return null; + } + RexLiteral columnConstant = columnConstantsMap.get(ref).get(i); + if (i == 0) { + columns.add(ref); + names.add(inputSchema.getFieldNames().get(ref.getIndex())); + paramsTypes.add(ref.getType()); + structReturnType.add(TypeConverter.convert(ref.getType())); + } + constantFields.add(columnConstant); + } + + if (i == 0) { + RexNode columnsRefs; + if (columns.size() == 1) { + columnsRefs = columns.get(0); + } else { + // Create STRUCT clause + columnsRefs = rexBuilder.makeCall(SqlStdOperatorTable.ROW, columns); + } + newOperands.add(columnsRefs); + newOperandsTypes.add(columnsRefs.getType()); + } + RexNode values; + if (constantFields.size() == 1) { + values = constantFields.get(0); + } else { + // Create STRUCT clause + values = rexBuilder.makeCall(SqlStdOperatorTable.ROW, constantFields); + } + newOperands.add(values); + newOperandsTypes.add(values.getType()); + } + + // 4. Create and return IN clause + return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f8860b7..c4dca9d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,6 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveOrToInClauseRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; @@ -1152,6 +1153,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveReduceExpressionsRule.PROJECT_INSTANCE, HiveReduceExpressionsRule.FILTER_INSTANCE, HiveReduceExpressionsRule.JOIN_INSTANCE, + HiveOrToInClauseRule.INSTANCE, HiveJoinAddNotNullRule.INSTANCE_JOIN, HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN, HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN, diff --git ql/src/test/queries/clientpositive/filter_in_or_dup.q ql/src/test/queries/clientpositive/filter_in_or_dup.q new file mode 100644 index 0000000..c512b92 --- /dev/null +++ ql/src/test/queries/clientpositive/filter_in_or_dup.q @@ -0,0 +1,6 @@ +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2'); + diff --git ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index b7a87fd..396d67e 100644 --- ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -120,10 +120,10 @@ STAGE PLANS: alias: over1k Statistics: Num rows: 2098 Data size: 16736 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53))) (type: boolean) - Statistics: Num rows: 300 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (struct(t,si)) IN (const struct(1,2), const struct(2,3), const struct(3,4), const struct(4,5), const struct(5,6), const struct(6,7), const struct(7,8), const struct(9,10), const struct(10,11), const struct(11,12), const struct(12,13), const struct(13,14), const struct(14,15), const struct(15,16), const struct(16,17), const struct(17,18), const struct(27,28), const struct(37,38), const struct(47,48), const struct(52,53)) (type: boolean) + Statistics: Num rows: 1049 Data size: 8368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 300 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1049 Data size: 8368 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -209,10 +209,10 @@ STAGE PLANS: alias: over1k Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53))) (type: boolean) - Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(t,si)) IN (const struct(1,2), const struct(2,3), const struct(3,4), const struct(4,5), const struct(5,6), const struct(6,7), const struct(7,8), const struct(9,10), const struct(10,11), const struct(11,12), const struct(12,13), const struct(13,14), const struct(14,15), const struct(15,16), const struct(16,17), const struct(17,18), const struct(27,28), const struct(37,38), const struct(47,48), const struct(52,53)) (type: boolean) + Statistics: Num rows: 1049 Data size: 105587 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2098 Data size: 211174 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105587 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index b09ad03..00fce88 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -678,15 +678,15 @@ STAGE PLANS: alias: loc_orc Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((state = 'OH') or (state = 'CA')) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (state) IN ('OH', 'CA') (type: boolean) + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 186f7af..2d3643f 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -263,11 +263,14 @@ STAGE PLANS: TableScan alias: loc_orc Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: PARTIAL - ListSink + Filter Operator + predicate: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: PARTIAL + ListSink PREHOOK: query: -- both partitions will be pruned -- basicStatState: NONE colStatState: NONE diff --git ql/src/test/results/clientpositive/auto_join19.q.out ql/src/test/results/clientpositive/auto_join19.q.out index 3f70055..1c70a72 100644 --- ql/src/test/results/clientpositive/auto_join19.q.out +++ ql/src/test/results/clientpositive/auto_join19.q.out @@ -53,12 +53,12 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + predicate: (((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11')) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -66,14 +66,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out index fa73acf..ec3878b 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out @@ -85,12 +85,12 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -219,12 +219,12 @@ STAGE PLANS: alias: test_table1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -359,12 +359,12 @@ STAGE PLANS: alias: test_table1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 diff --git ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index e0168b3..6cde4b0 100644 --- ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -372,9 +372,13 @@ STAGE PLANS: alias: dynamic_part_table Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE - ListSink + Filter Operator + isSamplingPred: false + predicate: (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: intcol (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + ListSink diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 738286e..cb56e8e 100644 --- ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -21,17 +21,17 @@ STAGE PLANS: alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((value = '2008-04-08') or (value = '2008-04-09')) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: f @@ -419,17 +419,17 @@ STAGE PLANS: alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((value = '2008-04-10') or (value = '2008-04-08')) and (value <> '')) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (((value) IN ('2008-04-10', '2008-04-08') and (value <> '')) and key is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/filter_in_or_dup.q.out ql/src/test/results/clientpositive/filter_in_or_dup.q.out new file mode 100644 index 0000000..57d5bc8 --- /dev/null +++ ql/src/test/results/clientpositive/filter_in_or_dup.q.out @@ -0,0 +1,32 @@ +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink + diff --git ql/src/test/results/clientpositive/flatten_and_or.q.out ql/src/test/results/clientpositive/flatten_and_or.q.out index 1d88fc9..e0e33a3 100644 --- ql/src/test/results/clientpositive/flatten_and_or.q.out +++ ql/src/test/results/clientpositive/flatten_and_or.q.out @@ -44,15 +44,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index 106ad6b..524c6cf 100644 --- ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -137,15 +137,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = '238') or (key = '94')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN ('238', '94') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 83576f1..2945090 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -471,7 +471,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238'))) (type: boolean) + predicate: (struct(key,value)) IN (const struct('484','val_484'), const struct('238','val_238')) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 05f05aa..416b07e 100644 --- ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -5910,49 +5910,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + filterExpr: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + predicate: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), UDFToDouble(_col2) (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), UDFToDouble(_col2) (type: double) - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: ds (string) Target Input: srcpart_orc Partition key expr: ds - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Select Operator expressions: UDFToDouble(_col2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: hr (int) Target Input: srcpart_orc Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Execution mode: llap Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 3f159a3..ad8f709 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -5545,49 +5545,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + filterExpr: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + predicate: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), UDFToDouble(_col2) (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), UDFToDouble(_col2) (type: double) - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: ds (string) Target Input: srcpart_orc Partition key expr: ds - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Select Operator expressions: UDFToDouble(_col2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: hr (int) Target Input: srcpart_orc Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Execution mode: llap Reducer 2 diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 684d4d7..2458e3d 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -1717,17 +1717,17 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 14) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + predicate: ((ds) IN ('2000-04-08', '2000-04-09') and (key = 14)) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 14 (type: int), _col1 (type: string) null sort order: aa sort order: ++ - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -1831,13 +1831,13 @@ STAGE PLANS: Select Operator expressions: 14 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1928,17 +1928,21 @@ STAGE PLANS: alias: pcr_t1 Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: aa - sort order: ++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: aa + sort order: ++ + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2040,13 +2044,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2513,17 +2517,17 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2627,13 +2631,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5521,17 +5525,17 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) = 11.0) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((hr) IN ('11', '12') and (UDFToDouble(key) = 11.0)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), '2008-04-08' (type: string), _col3 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: string) auto parallelism: false @@ -5638,13 +5642,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/perf/query13.q.out ql/src/test/results/clientpositive/perf/query13.q.out index cc40e79..15d8e5d 100644 --- ql/src/test/results/clientpositive/perf/query13.q.out +++ ql/src/test/results/clientpositive/perf/query13.q.out @@ -160,33 +160,33 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col3 - Select Operator [SEL_22] (rows=4491 width=362) + Select Operator [SEL_22] (rows=1122 width=362) Output:["_col0","_col3","_col5","_col7","_col8","_col9"] - Filter Operator [FIL_21] (rows=4491 width=362) + Filter Operator [FIL_21] (rows=1122 width=362) predicate:(((_col12 = 'M') and (_col13 = '4 yr Degree') and _col6 BETWEEN 100.0 AND 150.0 and (_col15 = 3)) or ((_col12 = 'D') and (_col13 = 'Primary') and _col6 BETWEEN 50.0 AND 100.0 and (_col15 = 1)) or ((_col12 = 'U') and (_col13 = 'Advanced Degree') and _col6 BETWEEN 150.0 AND 200.0 and (_col15 = 1))) - Merge Join Operator [MERGEJOIN_71] (rows=23958 width=362) + Merge Join Operator [MERGEJOIN_71] (rows=5989 width=362) Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7","_col8","_col9","_col12","_col13","_col15"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_11] (rows=7200 width=107) + Select Operator [SEL_11] (rows=3600 width=107) Output:["_col0","_col1"] - Filter Operator [FIL_66] (rows=7200 width=107) - predicate:(((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) + Filter Operator [FIL_66] (rows=3600 width=107) + predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_70] (rows=21780 width=362) + Merge Join Operator [MERGEJOIN_70] (rows=5445 width=362) Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col12","_col13"] <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_8] (rows=19800 width=362) + Select Operator [SEL_8] (rows=4950 width=362) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_65] (rows=19800 width=362) - predicate:((((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and ((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree'))) and cd_demo_sk is not null) + Filter Operator [FIL_65] (rows=4950 width=362) + predicate:(((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree')) and cd_demo_sk is not null) TableScan [TS_6] (rows=19800 width=362) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 2 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query34.q.out ql/src/test/results/clientpositive/perf/query34.q.out index 6fa6985..f90356b 100644 --- ql/src/test/results/clientpositive/perf/query34.q.out +++ ql/src/test/results/clientpositive/perf/query34.q.out @@ -54,10 +54,10 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1200 width=107) + Select Operator [SEL_11] (rows=600 width=107) Output:["_col0"] - Filter Operator [FIL_55] (rows=1200 width=107) - predicate:(((((hd_buy_potential = '1001-5000') or (hd_buy_potential = '5001-10000')) and (hd_vehicle_count > 0)) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2)) ELSE (null) END) and hd_demo_sk is not null) + Filter Operator [FIL_55] (rows=600 width=107) + predicate:((((hd_buy_potential) IN ('1001-5000', '5001-10000') and (hd_vehicle_count > 0)) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2)) ELSE (null) END) and hd_demo_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] <-Reducer 3 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query71.q.out ql/src/test/results/clientpositive/perf/query71.q.out index 504705b..b2860bf 100644 --- ql/src/test/results/clientpositive/perf/query71.q.out +++ ql/src/test/results/clientpositive/perf/query71.q.out @@ -37,10 +37,10 @@ Stage-0 <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Select Operator [SEL_38] (rows=86400 width=471) + Select Operator [SEL_38] (rows=43200 width=471) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_82] (rows=86400 width=471) - predicate:(((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) + Filter Operator [FIL_82] (rows=43200 width=471) + predicate:((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) TableScan [TS_36] (rows=86400 width=471) default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] <-Reducer 2 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query73.q.out ql/src/test/results/clientpositive/perf/query73.q.out index e367f51..0db9365 100644 --- ql/src/test/results/clientpositive/perf/query73.q.out +++ ql/src/test/results/clientpositive/perf/query73.q.out @@ -54,10 +54,10 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1200 width=107) + Select Operator [SEL_11] (rows=600 width=107) Output:["_col0"] - Filter Operator [FIL_55] (rows=1200 width=107) - predicate:(((((hd_buy_potential = '1001-5000') or (hd_buy_potential = '5001-10000')) and (hd_vehicle_count > 0)) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0)) ELSE (null) END) and hd_demo_sk is not null) + Filter Operator [FIL_55] (rows=600 width=107) + predicate:((((hd_buy_potential) IN ('1001-5000', '5001-10000') and (hd_vehicle_count > 0)) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0)) ELSE (null) END) and hd_demo_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] <-Reducer 3 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query85.q.out ql/src/test/results/clientpositive/perf/query85.q.out index 72ac500..72f6507 100644 --- ql/src/test/results/clientpositive/perf/query85.q.out +++ ql/src/test/results/clientpositive/perf/query85.q.out @@ -84,33 +84,33 @@ Stage-0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_104] (rows=21780 width=362) + Merge Join Operator [MERGEJOIN_104] (rows=5445 width=362) Conds:RS_29._col10, _col17, _col18=RS_30._col0, _col1, _col2(Inner),Output:["_col0","_col4","_col6","_col9","_col11","_col13","_col14"] <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_25] (rows=19800 width=362) + Select Operator [SEL_25] (rows=4950 width=362) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_97] (rows=19800 width=362) - predicate:((((((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U'))) and cd_demo_sk is not null) and cd_marital_status is not null) and cd_education_status is not null) + Filter Operator [FIL_97] (rows=4950 width=362) + predicate:(((((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) and cd_demo_sk is not null) and cd_marital_status is not null) and cd_education_status is not null) TableScan [TS_23] (rows=19800 width=362) default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col10, _col17, _col18 - Select Operator [SEL_22] (rows=8166 width=362) + Select Operator [SEL_22] (rows=2088 width=585) Output:["_col0","_col10","_col11","_col13","_col14","_col17","_col18","_col4","_col6","_col9"] - Filter Operator [FIL_21] (rows=8166 width=362) + Filter Operator [FIL_21] (rows=2088 width=585) predicate:(((_col17 = 'M') and (_col18 = '4 yr Degree') and _col5 BETWEEN 100.0 AND 150.0) or ((_col17 = 'D') and (_col18 = 'Primary') and _col5 BETWEEN 50.0 AND 100.0) or ((_col17 = 'U') and (_col18 = 'Advanced Degree') and _col5 BETWEEN 150.0 AND 200.0)) - Merge Join Operator [MERGEJOIN_103] (rows=21780 width=362) + Merge Join Operator [MERGEJOIN_103] (rows=5568 width=585) Conds:RS_18._col8=RS_19._col0(Inner),Output:["_col0","_col4","_col5","_col6","_col9","_col10","_col11","_col13","_col14","_col17","_col18"] <-Map 13 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_11] (rows=19800 width=362) + Select Operator [SEL_11] (rows=4950 width=362) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_96] (rows=19800 width=362) - predicate:((((((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U'))) and cd_demo_sk is not null) and cd_education_status is not null) and cd_marital_status is not null) + Filter Operator [FIL_96] (rows=4950 width=362) + predicate:(((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) and cd_demo_sk is not null) TableScan [TS_9] (rows=19800 width=362) default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 3 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/query91.q.out ql/src/test/results/clientpositive/perf/query91.q.out index 66d8056..71b1af1 100644 --- ql/src/test/results/clientpositive/perf/query91.q.out +++ ql/src/test/results/clientpositive/perf/query91.q.out @@ -54,10 +54,10 @@ Stage-0 <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Select Operator [SEL_17] (rows=9900 width=362) + Select Operator [SEL_17] (rows=2475 width=362) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_79] (rows=9900 width=362) - predicate:(((((cd_marital_status = 'M') or (cd_marital_status = 'W')) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree'))) and (((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree')))) and cd_demo_sk is not null) + Filter Operator [FIL_79] (rows=2475 width=362) + predicate:((((cd_marital_status) IN ('M', 'W') and (cd_education_status) IN ('Unknown', 'Advanced Degree')) and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree'))) and cd_demo_sk is not null) TableScan [TS_15] (rows=19800 width=362) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 5 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/pointlookup.q.out ql/src/test/results/clientpositive/pointlookup.q.out index 460cc74..c9bd064 100644 --- ql/src/test/results/clientpositive/pointlookup.q.out +++ ql/src/test/results/clientpositive/pointlookup.q.out @@ -44,15 +44,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/pointlookup2.q.out ql/src/test/results/clientpositive/pointlookup2.q.out index fb17e72..1bda037 100644 --- ql/src/test/results/clientpositive/pointlookup2.q.out +++ ql/src/test/results/clientpositive/pointlookup2.q.out @@ -1172,7 +1172,7 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/pointlookup3.q.out ql/src/test/results/clientpositive/pointlookup3.q.out index d5c4157..75ab080 100644 --- ql/src/test/results/clientpositive/pointlookup3.q.out +++ ql/src/test/results/clientpositive/pointlookup3.q.out @@ -129,7 +129,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) @@ -374,14 +374,14 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 1) (type: boolean) + predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), ds1 (type: string) - outputColumnNames: _col1, _col2 + expressions: key (type: int), value (type: string), ds1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 1 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string) + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string) null sort order: aaaa sort order: ++++ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE @@ -441,7 +441,7 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: 1 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string) + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1340,7 +1340,7 @@ STAGE PLANS: Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/pointlookup4.q.out ql/src/test/results/clientpositive/pointlookup4.q.out index 0a9bd3e..84f3e00 100644 --- ql/src/test/results/clientpositive/pointlookup4.q.out +++ ql/src/test/results/clientpositive/pointlookup4.q.out @@ -135,21 +135,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds1 = '2000-04-08') and (ds2 = '2001-04-08') and (key = 1)) or ((ds1 = '2000-04-09') and (ds2 = '2001-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds1,ds2)) IN (const struct(1,'2000-04-08','2001-04-08'), const struct(2,'2000-04-09','2001-04-09')) (type: boolean) + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: aaaa sort order: ++++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -247,21 +247,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.pcr_t1 name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds2=2001-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds1 2000-04-10 + ds2 2001-04-10 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds1/ds2 + partition_columns.types string:string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds1/ds2 + partition_columns.types string:string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1] /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [pcr_t1] + /pcr_t1/ds1=2000-04-10/ds2=2001-04-10 [pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -384,7 +431,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds1,key,ds2)) IN (const struct('2000-04-08',1,'2001-04-08'), const struct('2000-04-09',2,'2001-04-09')) (type: boolean) + predicate: (struct(key,ds1,ds2)) IN (const struct(1,'2000-04-08','2001-04-08'), const struct(2,'2000-04-09','2001-04-09')) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join19.q.out ql/src/test/results/clientpositive/spark/auto_join19.q.out index 88ef3f1..576077b 100644 --- ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -56,12 +56,12 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + predicate: (((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11')) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -71,14 +71,14 @@ STAGE PLANS: outputColumnNames: _col0, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out index 0b64a87..02d552f 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out @@ -89,12 +89,12 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -114,12 +114,12 @@ STAGE PLANS: alias: b Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -129,16 +129,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Local Work: Map Reduce Local Work @@ -147,10 +147,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -271,12 +271,12 @@ STAGE PLANS: alias: test_table1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -296,12 +296,12 @@ STAGE PLANS: alias: test_table2 Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -311,16 +311,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Local Work: Map Reduce Local Work @@ -329,10 +329,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -459,12 +459,12 @@ STAGE PLANS: alias: test_table1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -484,12 +484,12 @@ STAGE PLANS: alias: test_table2 Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) - Statistics: Num rows: 28 Data size: 245 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean) + Statistics: Num rows: 14 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 245 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 122 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -499,16 +499,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Local Work: Map Reduce Local Work @@ -517,10 +517,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index 6345eff..6d848c0 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -1758,17 +1758,17 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 14) (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + predicate: ((ds) IN ('2000-04-08', '2000-04-09') and (key = 14)) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 14 (type: int), _col1 (type: string) null sort order: aa sort order: ++ - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -1873,13 +1873,13 @@ STAGE PLANS: Select Operator expressions: 14 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1975,17 +1975,21 @@ STAGE PLANS: alias: pcr_t1 Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: aa - sort order: ++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: aa + sort order: ++ + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2088,13 +2092,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2572,17 +2576,17 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Path -> Alias: @@ -2687,13 +2691,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4894,17 +4898,17 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) = 11.0) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((hr) IN ('11', '12') and (UDFToDouble(key) = 11.0)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), '2008-04-08' (type: string), _col3 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: string) auto parallelism: false @@ -5012,13 +5016,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), '2008-04-08' (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c06ea94..48fb5df 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -49,15 +49,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -163,15 +163,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/tez/bucketpruning1.q.out ql/src/test/results/clientpositive/tez/bucketpruning1.q.out index 5315f2c..38d718b 100644 --- ql/src/test/results/clientpositive/tez/bucketpruning1.q.out +++ ql/src/test/results/clientpositive/tez/bucketpruning1.q.out @@ -1102,12 +1102,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean) + filterExpr: ((key) IN (1, 2) and (ds = '2008-04-08')) (type: boolean) + buckets included: [1,2,] of 16 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean) + predicate: ((key) IN (1, 2) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: string), '2008-04-08' (type: string) @@ -1197,12 +1198,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: ((((key = 1) or (key = 2)) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) + filterExpr: (((key) IN (1, 2) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) + buckets included: [1,2,] of 16 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((((key = 1) or (key = 2)) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) + predicate: (((key) IN (1, 2) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) @@ -1889,12 +1891,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: (((key = 1) and (ds = '2008-04-08')) and ((value = 'One') or (value = 'Two'))) (type: boolean) + filterExpr: (((key = 1) and (ds = '2008-04-08')) and (value) IN ('One', 'Two')) (type: boolean) + buckets included: [1,] of 16 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = 1) and (ds = '2008-04-08')) and ((value = 'One') or (value = 'Two'))) (type: boolean) + predicate: (((key = 1) and (ds = '2008-04-08')) and (value) IN ('One', 'Two')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 1 (type: int), value (type: string), '2008-04-08' (type: string) diff --git ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out index 159415d..5958e9f 100644 --- ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out @@ -5734,49 +5734,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + filterExpr: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + predicate: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), UDFToDouble(_col2) (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), UDFToDouble(_col2) (type: double) - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: ds (string) Target Input: srcpart_orc Partition key expr: ds - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Select Operator expressions: UDFToDouble(_col2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: hr (int) Target Input: srcpart_orc Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index b7a8174..4cd5826 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -1782,7 +1782,7 @@ Stage-0 Select Operator [SEL_2] Output:["_col0"] Filter Operator [FIL_4] - predicate:((c_int = -6) or (c_int = 6)) + predicate:(c_int) IN (-6, 6) TableScan [TS_0] Output:["key","c_int"] diff --git ql/src/test/results/clientpositive/tez/vectorized_case.q.out ql/src/test/results/clientpositive/tez/vectorized_case.q.out index 28b7f9b..7bcdf6e 100644 --- ql/src/test/results/clientpositive/tez/vectorized_case.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_case.q.out @@ -49,15 +49,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -163,15 +163,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index f3e31d4..36a5711 100644 --- ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -5469,49 +5469,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + filterExpr: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + predicate: ((((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0)) and ds is not null) and hr is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), UDFToDouble(_col2) (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), UDFToDouble(_col2) (type: double) - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: ds (string) Target Input: srcpart_orc Partition key expr: ds - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Select Operator expressions: UDFToDouble(_col2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: hr (int) Target Input: srcpart_orc Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 08c1412..e6d00bf 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -46,15 +46,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -157,15 +157,15 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint) IN (418, 12205, 10583) (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat