diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 033fc8d..cd700f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -102,16 +102,15 @@ public void initialize(HiveConf hiveConf) { transformations.add(new PredicatePushDown()); } else if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && pctx.getContext().isCboSucceeded()) { - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - transformations.add(new ConstantPropagate()); - } transformations.add(new SyntheticJoinPredicate()); transformations.add(new SimplePredicatePushDown()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - // We run constant propagation twice because after predicate pushdown, filter expressions - // are combined and may become eligible for reduction (like is not null filter). - transformations.add(new ConstantPropagate()); + + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && + !pctx.getContext().isCboSucceeded()) { + // We run constant propagation twice because after predicate pushdown, filter expressions + // are combined and may become eligible for reduction (like is not null filter). + transformations.add(new ConstantPropagate()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { @@ -121,10 +120,13 @@ public void initialize(HiveConf hiveConf) { /* Add list bucketing pruner. */ transformations.add(new ListBucketingPruner()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - // PartitionPruner may create more folding opportunities, run ConstantPropagate again. - transformations.add(new ConstantPropagate()); - } + } + if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) + || (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) + && pctx.getContext().isCboSucceeded())) { + // PartitionPruner may create more folding opportunities, run ConstantPropagate again. + transformations.add(new ConstantPropagate()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java new file mode 100644 index 0000000..72fef7f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Util; + +import com.google.common.collect.ImmutableList; + + +public class HiveRexUtil { + + /** + * Simplifies a boolean expression. + * + *

In particular:

+ * + */ + public static RexNode simplify(RexBuilder rexBuilder, RexNode e) { + switch (e.getKind()) { + case AND: + return simplifyAnd(rexBuilder, (RexCall) e); + case OR: + return simplifyOr(rexBuilder, (RexCall) e); + case CASE: + return simplifyCase(rexBuilder, (RexCall) e); + case IS_NULL: + return ((RexCall) e).getOperands().get(0).getType().isNullable() + ? e : rexBuilder.makeLiteral(false); + case IS_NOT_NULL: + return ((RexCall) e).getOperands().get(0).getType().isNullable() + ? e : rexBuilder.makeLiteral(true); + default: + return e; + } + } + + private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call) { + final List operands = call.getOperands(); + final List newOperands = new ArrayList<>(); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (RexUtil.isCasePredicate(call, i)) { + if (operand.isAlwaysTrue()) { + // Predicate is always TRUE. Make value the ELSE and quit. + newOperands.add(operands.get(i + 1)); + break; + } + if (operand.isAlwaysFalse()) { + // Predicate is always FALSE. Skip predicate and value. + ++i; + continue; + } + } + newOperands.add(operand); + } + assert newOperands.size() % 2 == 1; + switch (newOperands.size()) { + case 1: + return newOperands.get(0); + } + trueFalse: + if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) { + // Optimize CASE where every branch returns constant true or constant + // false: + // CASE + // WHEN p1 THEN TRUE + // WHEN p2 THEN FALSE + // WHEN p3 THEN TRUE + // ELSE FALSE + // END + final List> pairs = + casePairs(rexBuilder, newOperands); + for (Ord> pair : Ord.zip(pairs)) { + if (!pair.e.getValue().isAlwaysTrue() + && !pair.e.getValue().isAlwaysFalse()) { + break trueFalse; + } + } + final List terms = new ArrayList<>(); + final List notTerms = new ArrayList<>(); + for (Ord> pair : Ord.zip(pairs)) { + if (pair.e.getValue().isAlwaysTrue()) { + terms.add(RexUtil.andNot(rexBuilder, pair.e.getKey(), notTerms)); + } else { + notTerms.add(pair.e.getKey()); + } + } + return RexUtil.composeDisjunction(rexBuilder, terms, false); + } + if (newOperands.equals(operands)) { + return call; + } + return call.clone(call.getType(), newOperands); + } + + /** Given "CASE WHEN p1 THEN v1 ... ELSE e END" + * returns [(p1, v1), ..., (true, e)]. */ + private static List> casePairs(RexBuilder rexBuilder, + List operands) { + final ImmutableList.Builder> builder = + ImmutableList.builder(); + for (int i = 0; i < operands.size() - 1; i += 2) { + builder.add(Pair.of(operands.get(i), operands.get(i + 1))); + } + builder.add( + Pair.of((RexNode) rexBuilder.makeLiteral(true), Util.last(operands))); + return builder.build(); + } + + public static RexNode simplifyAnd(RexBuilder rexBuilder, RexCall e) { + final List terms = RelOptUtil.conjunctions(e); + final List notTerms = new ArrayList<>(); + final List nullOperands = new ArrayList<>(); + final List notNullOperands = new ArrayList<>(); + final List comparedOperands = new ArrayList<>(); + for (int i = 0; i < terms.size(); i++) { + final RexNode term = terms.get(i); + switch (term.getKind()) { + case NOT: + notTerms.add( + ((RexCall) term).getOperands().get(0)); + terms.remove(i); + --i; + break; + case LITERAL: + if (!RexLiteral.booleanValue(term)) { + return term; // false + } else { + terms.remove(i); + --i; + } + break; + case EQUALS: + case NOT_EQUALS: + case LESS_THAN: + case GREATER_THAN: + case LESS_THAN_OR_EQUAL: + case GREATER_THAN_OR_EQUAL: + RexCall call = (RexCall) term; + RexNode left = call.getOperands().get(0); + comparedOperands.add(left); + // if it is a cast, we include the inner reference + if (left.getKind() == SqlKind.CAST) { + RexCall leftCast = (RexCall) left; + comparedOperands.add(leftCast.getOperands().get(0)); + } + RexNode right = call.getOperands().get(1); + comparedOperands.add(right); + // if it is a cast, we include the inner reference + if (right.getKind() == SqlKind.CAST) { + RexCall rightCast = (RexCall) right; + comparedOperands.add(rightCast.getOperands().get(0)); + } + break; + case IS_NOT_NULL: + notNullOperands.add( + ((RexCall) term).getOperands().get(0)); + terms.remove(i); + --i; + break; + case IS_NULL: + nullOperands.add( + ((RexCall) term).getOperands().get(0)); + } + } + if (terms.isEmpty() && notTerms.isEmpty() && notNullOperands.isEmpty()) { + return rexBuilder.makeLiteral(true); + } + // If one column should be null and is in a comparison predicate, + // it is not satisfiable. + // Example. IS NULL(x) AND x < 5 - not satisfiable + if (!Collections.disjoint(nullOperands, comparedOperands)) { + return rexBuilder.makeLiteral(false); + } + // Remove not necessary IS NOT NULL expressions. + // + // Example. IS NOT NULL(x) AND x < 5 : x < 5 + for (RexNode operand : notNullOperands) { + if (!comparedOperands.contains(operand)) { + terms.add( + rexBuilder.makeCall( + SqlStdOperatorTable.IS_NOT_NULL, operand)); + } + } + // If one of the not-disjunctions is a disjunction that is wholly + // contained in the disjunctions list, the expression is not + // satisfiable. + // + // Example #1. x AND y AND z AND NOT (x AND y) - not satisfiable + // Example #2. x AND y AND NOT (x AND y) - not satisfiable + // Example #3. x AND y AND NOT (x AND y AND z) - may be satisfiable + for (RexNode notDisjunction : notTerms) { + final List terms2 = RelOptUtil.conjunctions(notDisjunction); + if (terms.containsAll(terms2)) { + return rexBuilder.makeLiteral(false); + } + } + // Add the NOT disjunctions back in. + for (RexNode notDisjunction : notTerms) { + terms.add( + rexBuilder.makeCall( + SqlStdOperatorTable.NOT, notDisjunction)); + } + return RexUtil.composeConjunction(rexBuilder, terms, false); + } + + /** Simplifies OR(x, x) into x, and similar. */ + public static RexNode simplifyOr(RexBuilder rexBuilder, RexCall call) { + assert call.getKind() == SqlKind.OR; + final List terms = RelOptUtil.disjunctions(call); + for (int i = 0; i < terms.size(); i++) { + final RexNode term = terms.get(i); + switch (term.getKind()) { + case LITERAL: + if (RexLiteral.booleanValue(term)) { + return term; // true + } else { + terms.remove(i); + --i; + } + } + } + return RexUtil.composeDisjunction(rexBuilder, terms, false); + } + + + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 50e139b..83cfa86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -16,6 +16,15 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; @@ -25,6 +34,7 @@ import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.rules.ValuesReduceRule; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; @@ -50,22 +60,14 @@ import org.apache.calcite.util.Stacks; import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Pattern; - /** * Collection of planner rules that apply various simplifying transformations on * RexNode trees. Currently, there are two transformations: @@ -123,24 +125,23 @@ public FilterReduceExpressionsRule(Class filterClass, @Override public void onMatch(RelOptRuleCall call) { final Filter filter = call.rel(0); - final List expList = - Lists.newArrayList(filter.getCondition()); - RexNode newConditionExp; - boolean reduced; + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + RexNode newConditionExp = HiveRexUtil.simplify(rexBuilder, filter.getCondition()); + final List expList = Lists.newArrayList(newConditionExp); + boolean reduced = false; final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter.getInput()); if (reduceExpressions(filter, expList, predicates)) { assert expList.size() == 1; newConditionExp = expList.get(0); reduced = true; - } else { - // No reduction, but let's still test the original - // predicate to see if it was already a constant, - // in which case we don't need any runtime decision - // about filtering. - newConditionExp = filter.getCondition(); - reduced = false; } + + // Even if no reduction, let's still test the original + // predicate to see if it was already a constant, + // in which case we don't need any runtime decision + // about filtering. if (newConditionExp.isAlwaysTrue()) { call.transformTo( filter.getInput()); @@ -151,64 +152,17 @@ else if (newConditionExp instanceof RexLiteral // call.transformTo(call.builder().values(filter.getRowType()).build()); return; } - else if (reduced) { + else if (reduced + || !newConditionExp.toString().equals(filter.getCondition().toString())) { call.transformTo(call.builder(). - push(filter.getInput()).filter(expList.get(0)).build()); + push(filter.getInput()).filter(newConditionExp).build()); } else { - if (newConditionExp instanceof RexCall) { - RexCall rexCall = (RexCall) newConditionExp; - boolean reverse = - rexCall.getOperator() - == SqlStdOperatorTable.NOT; - if (reverse) { - rexCall = (RexCall) rexCall.getOperands().get(0); - } - reduceNotNullableFilter(call, filter, rexCall, reverse); - } return; } // New plan is absolutely better than old plan. call.getPlanner().setImportance(filter, 0.0); } - - private void reduceNotNullableFilter( - RelOptRuleCall call, - Filter filter, - RexCall rexCall, - boolean reverse) { - // If the expression is a IS [NOT] NULL on a non-nullable - // column, then we can either remove the filter or replace - // it with an Empty. - boolean alwaysTrue; - switch (rexCall.getKind()) { - case IS_NULL: - case IS_UNKNOWN: - alwaysTrue = false; - break; - case IS_NOT_NULL: - alwaysTrue = true; - break; - default: - return; - } - if (reverse) { - alwaysTrue = !alwaysTrue; - } - RexNode operand = rexCall.getOperands().get(0); - if (operand instanceof RexInputRef) { - RexInputRef inputRef = (RexInputRef) operand; - if (!inputRef.getType().isNullable()) { - if (alwaysTrue) { - call.transformTo(filter.getInput()); - } else { - // TODO: support LogicalValues - // call.transformTo(call.builder().values(filter.getRowType()).build()); - return; - } - } - } - } } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index b42e78f..739faa9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -41,8 +41,6 @@ import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -70,6 +68,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableSet; @@ -151,71 +151,108 @@ public ExprNodeDesc visitCall(RexCall call) { return gfDesc; } - /** - * TODO: 1. Handle NULL - */ @Override public ExprNodeDesc visitLiteral(RexLiteral literal) { RelDataType lType = literal.getType(); - switch (literal.getType().getSqlTypeName()) { - case BOOLEAN: - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral - .booleanValue(literal))); - case TINYINT: - return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal - .getValue3()).byteValue())); - case SMALLINT: - return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, - Short.valueOf(((Number) literal.getValue3()).shortValue())); - case INTEGER: - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, - Integer.valueOf(((Number) literal.getValue3()).intValue())); - case BIGINT: - return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal - .getValue3()).longValue())); - case FLOAT: - case REAL: - return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, - Float.valueOf(((Number) literal.getValue3()).floatValue())); - case DOUBLE: - return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, - Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - new Date(((Calendar)literal.getValue()).getTimeInMillis())); - case TIME: - case TIMESTAMP: { - Object value = literal.getValue3(); - if (value instanceof Long) { - value = new Timestamp((Long)value); + if (RexLiteral.value(literal) == null) { + switch (literal.getType().getSqlTypeName()) { + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, null); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, null); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, null); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, null); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, null); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, null); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, null); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, null); + case TIME: + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, null); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, null); + case DECIMAL: + return new ExprNodeConstantDesc( + TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), lType.getScale()), null); + case VARCHAR: + case CHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null); + case INTERVAL_YEAR_MONTH: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, null); + case INTERVAL_DAY_TIME: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, null); + case OTHER: + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); + } + } else { + switch (literal.getType().getSqlTypeName()) { + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral + .booleanValue(literal))); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal + .getValue3()).byteValue())); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, + Short.valueOf(((Number) literal.getValue3()).shortValue())); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + Integer.valueOf(((Number) literal.getValue3()).intValue())); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal + .getValue3()).longValue())); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, + Float.valueOf(((Number) literal.getValue3()).floatValue())); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, + Double.valueOf(((Number) literal.getValue3()).doubleValue())); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + new Date(((Calendar)literal.getValue()).getTimeInMillis())); + case TIME: + case TIMESTAMP: { + Object value = literal.getValue3(); + if (value instanceof Long) { + value = new Timestamp((Long)value); + } + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); + } + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); + case DECIMAL: + return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), + lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); + case VARCHAR: + case CHAR: { + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); + } + case INTERVAL_YEAR_MONTH: { + BigDecimal monthsBd = (BigDecimal) literal.getValue(); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, + new HiveIntervalYearMonth(monthsBd.intValue())); + } + case INTERVAL_DAY_TIME: { + BigDecimal millisBd = (BigDecimal) literal.getValue(); + // Calcite literal is in millis, we need to convert to seconds + BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(secsBd)); + } + case OTHER: + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); } - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); - } - case BINARY: - return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); - case DECIMAL: - return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), - lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); - case VARCHAR: - case CHAR: { - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); - } - case INTERVAL_YEAR_MONTH: { - BigDecimal monthsBd = (BigDecimal) literal.getValue(); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, - new HiveIntervalYearMonth(monthsBd.intValue())); - } - case INTERVAL_DAY_TIME: { - BigDecimal millisBd = (BigDecimal) literal.getValue(); - // Calcite literal is in millis, we need to convert to seconds - BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(secsBd)); - } - case OTHER: - default: - return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 122546f..1c2a5fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -25,7 +25,6 @@ import java.util.Date; import java.util.GregorianCalendar; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -40,8 +39,10 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlCastFunction; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ConversionUtil; @@ -74,8 +75,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; @@ -88,6 +89,7 @@ import com.google.common.collect.ImmutableMap; public class RexNodeConverter { + private static class InputCtx { private final RelDataType calciteInpDataType; private final ImmutableMap hiveNameToPosMap; @@ -157,7 +159,7 @@ private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticExcep ExprNodeDesc tmpExprNode; RexNode tmpRN; - List childRexNodeLst = new LinkedList(); + List childRexNodeLst = new ArrayList(); Builder argTypeBldr = ImmutableList. builder(); // TODO: 1) Expand to other functions as needed 2) What about types other than primitive. @@ -213,6 +215,35 @@ private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticExcep retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType); + // If it is a case operator, we need to rewrite it as if it was a when + if (calciteOp.getKind() == SqlKind.CASE) { + if (FunctionRegistry.getNormalizedFunctionName(func.getFuncText()).equals("case")) { + List newChildRexNodeLst = new ArrayList(); + RexNode firstPred = childRexNodeLst.get(0); + int length = childRexNodeLst.size() % 2 == 1 ? + childRexNodeLst.size() : childRexNodeLst.size() - 1; + for (int i = 1; i < length; i++) { + if (i % 2 == 1) { + // We rewrite it + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCall( + SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); + } else { + newChildRexNodeLst.add(childRexNodeLst.get(i)); + } + } + // The else clause + if (length != childRexNodeLst.size()) { + newChildRexNodeLst.add(childRexNodeLst.get(childRexNodeLst.size()-1)); + } + childRexNodeLst = newChildRexNodeLst; + } + // Calcite always needs the else clause to be defined explicitly + if (childRexNodeLst.size() % 2 == 0) { + childRexNodeLst.add(cluster.getRexBuilder().makeNullLiteral( + childRexNodeLst.get(childRexNodeLst.size()-1).getType().getSqlTypeName())); + } + } expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst); } else { retType = expr.getType(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 75c38fa..02b2272 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -203,6 +203,7 @@ public static ASTNode buildAST(SqlOperator op, List children) { case BETWEEN: case ROW: case IS_NOT_NULL: + case CASE: node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); break; @@ -320,10 +321,13 @@ private static String getName(GenericUDF hiveUDF) { hToken(HiveParser.GREATERTHANOREQUALTO, ">=")); registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); + registerDuplicateFunction("!=", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL")); + registerFunction("when", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); + registerDuplicateFunction("case", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index cc9dc23..b332392 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1082,13 +1082,13 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY), + HiveReduceExpressionsRule.PROJECT_INSTANCE, + HiveReduceExpressionsRule.FILTER_INSTANCE, + HiveReduceExpressionsRule.JOIN_INSTANCE, HiveJoinAddNotNullRule.INSTANCE_JOIN, HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN, HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN, - HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN, - HiveReduceExpressionsRule.PROJECT_INSTANCE, - HiveReduceExpressionsRule.FILTER_INSTANCE, - HiveReduceExpressionsRule.JOIN_INSTANCE); + HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); diff --git ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index 26656e2..a7fedec 100644 --- ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -406,7 +406,7 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (s_store_sk is not null and (s_company_id > 0)) (type: boolean) + predicate: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: s_store_sk (type: int) @@ -421,7 +421,7 @@ STAGE PLANS: alias: ss Statistics: Num rows: 1000 Data size: 7668 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and (ss_quantity > 10)) (type: boolean) + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) @@ -471,7 +471,7 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s_store_sk is not null and (s_floor_space > 0)) (type: boolean) + predicate: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) @@ -551,7 +551,7 @@ STAGE PLANS: alias: ss Statistics: Num rows: 1000 Data size: 7668 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and (ss_quantity > 10)) (type: boolean) + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) @@ -788,7 +788,7 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s_store_sk is not null and (s_floor_space > 1000)) (type: boolean) + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) @@ -859,7 +859,7 @@ STAGE PLANS: alias: ss Statistics: Num rows: 1000 Data size: 7668 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and (ss_quantity > 10)) (type: boolean) + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) diff --git ql/src/test/results/clientpositive/auto_join_without_localtask.q.out ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index 1521a71..d40b165 100644 --- ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -704,7 +704,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -889,7 +889,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -937,7 +937,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out index d4bc93c..2d3f12b 100644 --- ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out +++ ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out @@ -523,7 +523,7 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest3 -{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"(src1.key is not null and (length(src1.key) > 1))","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"(src2.key2 is not null and (length(src2.key2) > 1))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3 PREHOOK: type: QUERY @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -646,7 +646,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"01879c619517509d9f5b6ead998bb4bb","queryText":"select sum(a.c1), count(b.c1), b.c2, b.c3\nfrom dest_l2 a join dest_l3 b on (a.id = b.id)\nwhere a.c2 != 10 and b.c3 > 0\ngroup by a.c1, a.c2, a.id, b.c1, b.c2, b.c3\nhaving count(a.c2) > 0\norder by b.c3 limit 5","edges":[{"sources":[4],"targets":[0],"expression":"sum(default.dest_l2.c1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"count(default.dest_l3.c1)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8,9],"targets":[0,1,2,3],"expression":"(a.id is not null and (a.c2 <> 10))","edgeType":"PREDICATE"},{"sources":[8,10],"targets":[0,1,2,3],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[10,7],"targets":[0,1,2,3],"expression":"(b.id is not null and (b.c3 > 0))","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3],"expression":"(count(default.dest_l2.c2) > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"},{"id":1,"vertexType":"COLUMN","vertexId":"_c1"},{"id":2,"vertexType":"COLUMN","vertexId":"b.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"b.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"01879c619517509d9f5b6ead998bb4bb","queryText":"select sum(a.c1), count(b.c1), b.c2, b.c3\nfrom dest_l2 a join dest_l3 b on (a.id = b.id)\nwhere a.c2 != 10 and b.c3 > 0\ngroup by a.c1, a.c2, a.id, b.c1, b.c2, b.c3\nhaving count(a.c2) > 0\norder by b.c3 limit 5","edges":[{"sources":[4],"targets":[0],"expression":"sum(default.dest_l2.c1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"count(default.dest_l3.c1)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8,9],"targets":[0,1,2,3],"expression":"((a.c2 <> 10) and a.id is not null)","edgeType":"PREDICATE"},{"sources":[9,10],"targets":[0,1,2,3],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[7,10],"targets":[0,1,2,3],"expression":"((b.c3 > 0) and b.id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3],"expression":"(count(default.dest_l2.c2) > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"_c0"},{"id":1,"vertexType":"COLUMN","vertexId":"_c1"},{"id":2,"vertexType":"COLUMN","vertexId":"b.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"b.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"}]} 1 1 s2 15 PREHOOK: query: drop table if exists t PREHOOK: type: DROPTABLE @@ -659,7 +659,7 @@ PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 PREHOOK: Output: database:default PREHOOK: Output: default@t -{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id is not null and (a.id > 0))","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1],"expression":"(b.id is not null and (b.c3 = 15) and (b.id > 0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id > 0)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1],"expression":"((b.c3 = 15) and (b.id > 0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)), concat(substr(src1.key,1,1),sum(substr(src1.value,5))) from src1 diff --git ql/src/test/results/clientpositive/correlationoptimizer13.q.out ql/src/test/results/clientpositive/correlationoptimizer13.q.out index 048f63b..61b7bcb 100644 --- ql/src/test/results/clientpositive/correlationoptimizer13.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -162,7 +162,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and c3 is not null) and (c1 < 120)) (type: boolean) + predicate: (((c2 > 100) and (c1 < 120)) and c3 is not null) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index ba54b87..368a114 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -103,7 +103,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -290,7 +290,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -963,7 +963,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/correlationoptimizer9.q.out ql/src/test/results/clientpositive/correlationoptimizer9.q.out index b687616..104a97a 100644 --- ql/src/test/results/clientpositive/correlationoptimizer9.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer9.q.out @@ -464,7 +464,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and c3 is not null) and (c1 < 120)) (type: boolean) + predicate: (((c2 > 100) and (c1 < 120)) and c3 is not null) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c3 (type: string) @@ -579,7 +579,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and c3 is not null) and (c1 < 120)) (type: boolean) + predicate: (((c2 > 100) and (c1 < 120)) and c3 is not null) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c3 (type: string) diff --git ql/src/test/results/clientpositive/decimal_udf.q.out ql/src/test/results/clientpositive/decimal_udf.q.out index 0b18d48..68ba4da 100644 --- ql/src/test/results/clientpositive/decimal_udf.q.out +++ ql/src/test/results/clientpositive/decimal_udf.q.out @@ -973,7 +973,7 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key <> 0)) (type: boolean) + predicate: (key <> 0) (type: boolean) Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / key) (type: decimal(38,24)) @@ -1039,7 +1039,7 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value <> 0)) (type: boolean) + predicate: (value <> 0) (type: boolean) Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) @@ -1095,7 +1095,7 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value <> 0)) (type: boolean) + predicate: (value <> 0) (type: boolean) Statistics: Num rows: 3 Data size: 359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) diff --git ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index a8be151..4776fe4 100644 --- ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -1025,7 +1025,7 @@ STAGE PLANS: alias: date_dim Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((d_year = 1999) and d_date_sk is not null) and (d_moy = 3)) (type: boolean) + predicate: (((d_year = 1999) and (d_moy = 3)) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -1262,7 +1262,7 @@ STAGE PLANS: alias: date_dim Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((d_year = 1999) and d_date_sk is not null) and (d_moy = 4)) (type: boolean) + predicate: (((d_year = 1999) and (d_moy = 4)) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: d_date_sk (type: int) diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 1c3a5ab..0bb8c07 100644 --- ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -37,7 +37,7 @@ STAGE PLANS: alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (value <> '')) and key is not null) (type: boolean) + predicate: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -79,7 +79,7 @@ STAGE PLANS: alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value <> '')) (type: boolean) + predicate: (value <> '') (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -163,7 +163,7 @@ STAGE PLANS: alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (value <> '')) and key is not null) (type: boolean) + predicate: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -205,7 +205,7 @@ STAGE PLANS: alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value <> '')) (type: boolean) + predicate: (value <> '') (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -419,7 +419,7 @@ STAGE PLANS: alias: f Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key is not null and ((value = '2008-04-10') or (value = '2008-04-08'))) and value is not null) and (value <> '')) (type: boolean) + predicate: ((((value = '2008-04-10') or (value = '2008-04-08')) and (value <> '')) and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/filter_join_breaktask.q.out ql/src/test/results/clientpositive/filter_join_breaktask.q.out index c17b48a..eeb524e 100644 --- ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -168,7 +168,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) + predicate: (((value <> '') and key is not null) and value is not null) (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -279,7 +279,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and (value <> '')) (type: boolean) + predicate: ((value <> '') and value is not null) (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) diff --git ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index b7e795b..70d1f81 100644 --- ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -101,10 +101,10 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: NVL((key = '238'),false) (type: boolean) + predicate: (key = '238') (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: '238' (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -137,15 +137,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE (key) WHEN ('238') THEN (true) WHEN ('94') THEN (true) ELSE (false) END (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: ((key = '238') or (key = '94')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -173,7 +173,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: NVL((key = '238'),false) (type: boolean) + predicate: CASE WHEN ((key <> '238')) THEN ((key = '238')) ELSE ((key = '238')) END (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -209,7 +209,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE (key) WHEN ('238') THEN (CASE WHEN ((key <> '238')) THEN (true) WHEN ((key = '23')) THEN (true) END) END (type: boolean) + predicate: CASE WHEN ((key <> '238')) THEN ((key = '238')) WHEN ((key = '23')) THEN ((key = '238')) ELSE (null) END (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) diff --git ql/src/test/results/clientpositive/index_auto_unused.q.out ql/src/test/results/clientpositive/index_auto_unused.q.out index e1ddff8..a40ee8c 100644 --- ql/src/test/results/clientpositive/index_auto_unused.q.out +++ ql/src/test/results/clientpositive/index_auto_unused.q.out @@ -356,37 +356,25 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM srcpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-09' (type: string), '12' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: srcpart + filterExpr: (UDFToDouble(key) < 10.0) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) < 10.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-09' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT * FROM srcpart WHERE ds='2008-04-09' AND hr=12 AND key < 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/join34.q.out ql/src/test/results/clientpositive/join34.q.out index f1bf494..e2c2b1a 100644 --- ql/src/test/results/clientpositive/join34.q.out +++ ql/src/test/results/clientpositive/join34.q.out @@ -159,7 +159,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/join35.q.out ql/src/test/results/clientpositive/join35.q.out index 3ff41ea..663642c 100644 --- ql/src/test/results/clientpositive/join35.q.out +++ ql/src/test/results/clientpositive/join35.q.out @@ -273,7 +273,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and ((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0))) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/join42.q.out ql/src/test/results/clientpositive/join42.q.out index 4f9e2a9..2339351 100644 --- ql/src/test/results/clientpositive/join42.q.out +++ ql/src/test/results/clientpositive/join42.q.out @@ -80,6 +80,8 @@ POSTHOOK: Output: default@acct POSTHOOK: Lineage: acct.acc_n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: acct.aid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: acct.brn EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: --[HIVE-10841] (WHERE col is not null) does not work sometimes for queries with many JOIN statements explain select acct.ACC_N, @@ -112,7 +114,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -127,26 +130,46 @@ STAGE PLANS: Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 4436 (type: int) - sort order: + - Map-reduce partition columns: 4436 (type: int) + sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan alias: la Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((aid is not null and pi_id is not null) and (loan_id = 4436)) (type: boolean) + predicate: (((loan_id = 4436) and aid is not null) and pi_id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: aid (type: int), pi_id (type: int) outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 4436 (type: int) - sort order: + - Map-reduce partition columns: 4436 (type: int) + sort order: Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) TableScan alias: fr Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -156,21 +179,17 @@ STAGE PLANS: Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 4436 (type: int) - sort order: + - Map-reduce partition columns: 4436 (type: int) + sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) + 0 + 1 outputColumnNames: _col2, _col3 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -178,7 +197,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -186,7 +205,7 @@ STAGE PLANS: key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int) TableScan alias: a @@ -237,7 +256,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -289,6 +308,8 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select acct.ACC_N, acct.brn diff --git ql/src/test/results/clientpositive/join_grp_diff_keys.q.out ql/src/test/results/clientpositive/join_grp_diff_keys.q.out index b24bcba..9bcdc01 100644 --- ql/src/test/results/clientpositive/join_grp_diff_keys.q.out +++ ql/src/test/results/clientpositive/join_grp_diff_keys.q.out @@ -59,7 +59,7 @@ STAGE PLANS: alias: foo Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((id is not null and line_id is not null) and (orders <> 'blah')) (type: boolean) + predicate: (((orders <> 'blah') and id is not null) and line_id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: id (type: int), line_id (type: int) diff --git ql/src/test/results/clientpositive/lineage2.q.out ql/src/test/results/clientpositive/lineage2.q.out index ec64c10..66929dd 100644 --- ql/src/test/results/clientpositive/lineage2.q.out +++ ql/src/test/results/clientpositive/lineage2.q.out @@ -523,14 +523,14 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest3 -{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"(src1.key is not null and (length(src1.key) > 1))","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"(src2.key2 is not null and (length(src2.key2) > 1))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"a2c4e9a3ec678039814f5d84b1e38ce4","queryText":"create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 1) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 1) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest3.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest3.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest3.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest3.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"mr","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"(src1.key is not null and (length(src1.key) > 3))","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"(src2.key2 is not null and (length(src2.key2) > 3))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = e.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"e.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = e.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"e.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -646,7 +646,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"01879c619517509d9f5b6ead998bb4bb","queryText":"select sum(a.c1), count(b.c1), b.c2, b.c3\nfrom dest_l2 a join dest_l3 b on (a.id = b.id)\nwhere a.c2 != 10 and b.c3 > 0\ngroup by a.c1, a.c2, a.id, b.c1, b.c2, b.c3\nhaving count(a.c2) > 0\norder by b.c3 limit 5","edges":[{"sources":[4],"targets":[0],"expression":"sum(default.dest_l2.c1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"count(default.dest_l3.c1)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8,9],"targets":[0,1,2,3],"expression":"(a.id is not null and (a.c2 <> 10))","edgeType":"PREDICATE"},{"sources":[8,10],"targets":[0,1,2,3],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[10,7],"targets":[0,1,2,3],"expression":"(b.id is not null and (b.c3 > 0))","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3],"expression":"(count(default.dest_l2.c2) > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"c0"},{"id":1,"vertexType":"COLUMN","vertexId":"c1"},{"id":2,"vertexType":"COLUMN","vertexId":"b.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"b.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"01879c619517509d9f5b6ead998bb4bb","queryText":"select sum(a.c1), count(b.c1), b.c2, b.c3\nfrom dest_l2 a join dest_l3 b on (a.id = b.id)\nwhere a.c2 != 10 and b.c3 > 0\ngroup by a.c1, a.c2, a.id, b.c1, b.c2, b.c3\nhaving count(a.c2) > 0\norder by b.c3 limit 5","edges":[{"sources":[4],"targets":[0],"expression":"sum(default.dest_l2.c1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"count(default.dest_l3.c1)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8,9],"targets":[0,1,2,3],"expression":"((a.c2 <> 10) and a.id is not null)","edgeType":"PREDICATE"},{"sources":[9,10],"targets":[0,1,2,3],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[7,10],"targets":[0,1,2,3],"expression":"((b.c3 > 0) and b.id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3],"expression":"(count(default.dest_l2.c2) > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"c0"},{"id":1,"vertexType":"COLUMN","vertexId":"c1"},{"id":2,"vertexType":"COLUMN","vertexId":"b.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"b.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"}]} 1 1 s2 15 PREHOOK: query: drop table if exists t PREHOOK: type: DROPTABLE @@ -659,7 +659,7 @@ PREHOOK: Input: default@dest_l2 PREHOOK: Input: default@dest_l3 PREHOOK: Output: database:default PREHOOK: Output: default@t -{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id is not null and (a.id > 0))","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1],"expression":"(b.id is not null and (b.c3 = 15) and (b.id > 0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"0d2f15b494111ffe236d5be42a76fa28","queryText":"create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1],"expression":"(a.id > 0)","edgeType":"PREDICATE"},{"sources":[4,5],"targets":[0,1],"expression":"(a.id = b.id)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1],"expression":"((b.c3 = 15) and (b.id > 0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.t.c2"},{"id":1,"vertexType":"COLUMN","vertexId":"default.t.c3"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)), concat(substr(src1.key,1,1),sum(substr(src1.value,5))) from src1 diff --git ql/src/test/results/clientpositive/lineage3.q.out ql/src/test/results/clientpositive/lineage3.q.out index ca7d6e0..0e9c2a9 100644 --- ql/src/test/results/clientpositive/lineage3.q.out +++ ql/src/test/results/clientpositive/lineage3.q.out @@ -51,7 +51,7 @@ where cint is not null and cint < 0 order by cint, cs limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=today -{"version":"1.0","engine":"mr","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint is not null and (alltypesorc.cint < 0))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: insert into table dest_l1 partition (ds='tomorrow') select min(cint), cast(min(cstring1) as varchar(128)) as cs from alltypesorc @@ -61,7 +61,7 @@ having min(cbigint) > 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=tomorrow -{"version":"1.0","engine":"mr","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2,4],"targets":[0,1],"expression":"(alltypesorc.cint is not null and (alltypesorc.cboolean1 = true))","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"((alltypesorc.cboolean1 = true) and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: select cint, rank() over(order by cint) from alltypesorc where cint > 10 and cint < 10000 limit 10 PREHOOK: type: QUERY @@ -116,7 +116,7 @@ order by a.cbigint, a.ctinyint, b.cint, b.ctinyint limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"afd760470fc5aa6d3e8348dee03af97f","queryText":"select a.cbigint, a.ctinyint, b.cint, b.ctinyint\nfrom\n (select ctinyint, cbigint from alltypesorc\n union all\n select ctinyint, cbigint from alltypesorc) a\n inner join\n alltypesorc b\n on (a.ctinyint = b.ctinyint)\nwhere b.ctinyint < 100 and a.cbigint is not null and b.cint is not null\norder by a.cbigint, a.ctinyint, b.cint, b.ctinyint limit 5","edges":[{"sources":[4],"targets":[0],"expression":"cbigint","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"ctinyint","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[5],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5,4],"targets":[0,1,2,3],"expression":"(alltypesorc.ctinyint is not null and alltypesorc.cbigint is not null and (alltypesorc.ctinyint < 100))","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(ctinyint = alltypesorc.ctinyint)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1,2,3],"expression":"(alltypesorc.ctinyint is not null and (alltypesorc.ctinyint < 100) and alltypesorc.cint is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.cbigint"},{"id":1,"vertexType":"COLUMN","vertexId":"a.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"b.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"b.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"afd760470fc5aa6d3e8348dee03af97f","queryText":"select a.cbigint, a.ctinyint, b.cint, b.ctinyint\nfrom\n (select ctinyint, cbigint from alltypesorc\n union all\n select ctinyint, cbigint from alltypesorc) a\n inner join\n alltypesorc b\n on (a.ctinyint = b.ctinyint)\nwhere b.ctinyint < 100 and a.cbigint is not null and b.cint is not null\norder by a.cbigint, a.ctinyint, b.cint, b.ctinyint limit 5","edges":[{"sources":[4],"targets":[0],"expression":"cbigint","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"ctinyint","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[5],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5,4],"targets":[0,1,2,3],"expression":"((alltypesorc.ctinyint < 100) and alltypesorc.cbigint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(ctinyint = alltypesorc.ctinyint)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1,2,3],"expression":"((alltypesorc.ctinyint < 100) and alltypesorc.cint is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.cbigint"},{"id":1,"vertexType":"COLUMN","vertexId":"a.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"b.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"b.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"}]} -2147311592 -51 -1071480828 -51 -2147311592 -51 -1071480828 -51 -2147311592 -51 -1067683781 -51 @@ -135,7 +135,7 @@ and x.ctinyint + length(c.cstring2) < 1000 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"3a12ad24b2622a8958df12d0bdc60f8a","queryText":"select x.ctinyint, x.cint, c.cbigint-100, c.cstring1\nfrom alltypesorc c\njoin (\n select a.ctinyint ctinyint, b.cint cint\n from (select * from alltypesorc a where cboolean1=false) a\n join alltypesorc b on (a.cint = b.cbigint - 224870380)\n ) x on (x.cint = c.cint)\nwhere x.ctinyint > 10\nand x.cint < 4.5\nand x.ctinyint + length(c.cstring2) < 1000","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"(c.cbigint - UDFToLong(100))","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"(c.cint is not null and (UDFToDouble(c.cint) < 4.5))","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(c.cint = c.cint)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1,2,3],"expression":"(c.cbigint is not null and c.cint is not null and (UDFToDouble(c.cint) < 4.5))","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1,2,3],"expression":"((c.cbigint - UDFToLong(224870380)) = UDFToLong(c.cint))","edgeType":"PREDICATE"},{"sources":[8,5,4],"targets":[0,1,2,3],"expression":"((c.cboolean1 = false) and c.cint is not null and (c.ctinyint > 10))","edgeType":"PREDICATE"},{"sources":[4,9],"targets":[0,1,2,3],"expression":"((UDFToInteger(c.ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"x.cint"},{"id":2,"vertexType":"COLUMN","vertexId":"c2"},{"id":3,"vertexType":"COLUMN","vertexId":"c.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"3a12ad24b2622a8958df12d0bdc60f8a","queryText":"select x.ctinyint, x.cint, c.cbigint-100, c.cstring1\nfrom alltypesorc c\njoin (\n select a.ctinyint ctinyint, b.cint cint\n from (select * from alltypesorc a where cboolean1=false) a\n join alltypesorc b on (a.cint = b.cbigint - 224870380)\n ) x on (x.cint = c.cint)\nwhere x.ctinyint > 10\nand x.cint < 4.5\nand x.ctinyint + length(c.cstring2) < 1000","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"(c.cbigint - UDFToLong(100))","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"(UDFToDouble(c.cint) < 4.5)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(c.cint = c.cint)","edgeType":"PREDICATE"},{"sources":[5,6],"targets":[0,1,2,3],"expression":"((UDFToDouble(c.cint) < 4.5) and c.cbigint is not null)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1,2,3],"expression":"((c.cbigint - UDFToLong(224870380)) = UDFToLong(c.cint))","edgeType":"PREDICATE"},{"sources":[8,4,5],"targets":[0,1,2,3],"expression":"((c.cboolean1 = false) and (c.ctinyint > 10) and c.cint is not null)","edgeType":"PREDICATE"},{"sources":[4,9],"targets":[0,1,2,3],"expression":"((UDFToInteger(c.ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"x.cint"},{"id":2,"vertexType":"COLUMN","vertexId":"c2"},{"id":3,"vertexType":"COLUMN","vertexId":"c.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring2"}]} 11 -654374827 857266369 OEfPnHnIYueoup PREHOOK: query: select c1, x2, x3 from ( @@ -178,7 +178,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) is not null and (UDFToDouble(a.key) > UDFToDouble(300)))","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"UDFToDouble((UDFToInteger(b.ctinyint) + 300)) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"((UDFToDouble(a.key) > UDFToDouble(300)) and UDFToDouble(a.key) is not null)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"UDFToDouble((UDFToInteger(b.ctinyint) + 300)) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 311 val_311 Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select key, value from src1 @@ -251,7 +251,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@dest_v1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"b0192d4da86f4bef38fe7ab1fc607906","queryText":"select t.ctinyint from (select * from dest_v1 where ctinyint is not null) t\nwhere ctinyint > 10 order by ctinyint limit 2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[1],"targets":[0],"expression":"(alltypesorc.ctinyint is not null and (alltypesorc.ctinyint > 10))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"t.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"b0192d4da86f4bef38fe7ab1fc607906","queryText":"select t.ctinyint from (select * from dest_v1 where ctinyint is not null) t\nwhere ctinyint > 10 order by ctinyint limit 2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[1],"targets":[0],"expression":"(alltypesorc.ctinyint > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"t.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 11 11 PREHOOK: query: drop view if exists dest_v2 @@ -317,7 +317,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@dest_v3 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) csmallint)))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7,8],"targets":[0,1,2],"expression":"(a.cint is not null and (a.cboolean2 = true))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[7,9],"targets":[0,1,2],"expression":"(a.cint is not null and (a.cfloat > 0.0))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) csmallint)))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9,7],"targets":[0,1,2],"expression":"((a.cfloat > 0.0) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} 38 216 false 38 229 true PREHOOK: query: drop table if exists src_dp diff --git ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index 6193580..41d824c 100644 --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -512,7 +512,7 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (value > 'val_450')) (type: boolean) + predicate: ((value > 'val_450') and key is not null) (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/merge_dynamic_partition.q.out ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index da19b32..07102b1 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -1291,7 +1291,7 @@ STAGE PLANS: alias: srcpart_merge_dp Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/ppd_join2.q.out ql/src/test/results/clientpositive/ppd_join2.q.out index 955f4ad..8d7027f 100644 --- ql/src/test/results/clientpositive/ppd_join2.q.out +++ ql/src/test/results/clientpositive/ppd_join2.q.out @@ -39,7 +39,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '302') and (key < '400')) and (key <> '305')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + predicate: (((((((key <> '302') and (key < '400')) and (key <> '305')) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -97,7 +97,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key <> '306') and value is not null) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((key <> '306') and (sqrt(key) <> 13.0)) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -1723,7 +1723,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((((key <> '302') and (key < '400')) and (key <> '305')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + predicate: (((((((key <> '302') and (key < '400')) and (key <> '305')) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -1781,7 +1781,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key <> '306') and value is not null) and (sqrt(key) <> 13.0)) (type: boolean) + predicate: (((key <> '306') and (sqrt(key) <> 13.0)) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) diff --git ql/src/test/results/clientpositive/ppd_join5.q.out ql/src/test/results/clientpositive/ppd_join5.q.out index 18a75c6..ab247f2 100644 --- ql/src/test/results/clientpositive/ppd_join5.q.out +++ ql/src/test/results/clientpositive/ppd_join5.q.out @@ -71,7 +71,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (d <= 1)) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), d (type: int) @@ -191,7 +191,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (d <= 1)) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), d (type: int) diff --git ql/src/test/results/clientpositive/ppd_outer_join5.q.out ql/src/test/results/clientpositive/ppd_outer_join5.q.out index b2dd5ef..98712b5 100644 --- ql/src/test/results/clientpositive/ppd_outer_join5.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join5.q.out @@ -30,13 +30,16 @@ POSTHOOK: query: create table t4 (id int, key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t4 +Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,11 +56,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) TableScan alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -65,15 +66,40 @@ STAGE PLANS: predicate: (id = 20) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 20 (type: int), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -81,33 +107,33 @@ STAGE PLANS: predicate: (id = 20) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 20 (type: int), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 + Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -115,13 +141,16 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -134,13 +163,11 @@ STAGE PLANS: predicate: (id = 20) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 20 (type: int), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) TableScan @@ -150,15 +177,37 @@ STAGE PLANS: predicate: (id = 20) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 20 (type: int), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -170,29 +219,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 + Left Outer Join0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 + 1 + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -200,13 +249,16 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -219,13 +271,11 @@ STAGE PLANS: predicate: (id = 20) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 20 (type: int), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) TableScan @@ -235,15 +285,37 @@ STAGE PLANS: predicate: (id = 20) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 20 (type: int), key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -255,29 +327,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Left Outer Join0 to 2 + Left Outer Join0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 + 1 + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/ppd_udf_case.q.out ql/src/test/results/clientpositive/ppd_udf_case.q.out index d09ed31..422b6ea 100644 --- ql/src/test/results/clientpositive/ppd_udf_case.q.out +++ ql/src/test/results/clientpositive/ppd_udf_case.q.out @@ -37,7 +37,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) + predicate: (((ds = '2008-04-08') and CASE WHEN ((key = '27')) THEN (true) WHEN ((key = '38')) THEN (false) ELSE (null) END) and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) @@ -53,7 +53,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) + predicate: (((ds = '2008-04-08') and CASE WHEN ((key = '27')) THEN (true) WHEN ((key = '38')) THEN (false) ELSE (null) END) and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) @@ -183,7 +183,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) + predicate: (CASE WHEN ((key = '27')) THEN (true) WHEN ((key = '38')) THEN (false) ELSE (null) END and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) @@ -199,7 +199,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) + predicate: (CASE WHEN ((key = '27')) THEN (true) WHEN ((key = '38')) THEN (false) ELSE (null) END and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) diff --git ql/src/test/results/clientpositive/ppd_union_view.q.out ql/src/test/results/clientpositive/ppd_union_view.q.out index ddd236a..345b3da 100644 --- ql/src/test/results/clientpositive/ppd_union_view.q.out +++ ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -177,13 +177,13 @@ STAGE PLANS: predicate: keymap is not null (type: boolean) Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: keymap (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: keymap (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), '2011-10-13' (type: string) + key expressions: _col0 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: string), '2011-10-13' (type: string) + Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) @@ -197,13 +197,13 @@ STAGE PLANS: predicate: keymap is not null (type: boolean) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), keymap (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), keymap (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), '2011-10-13' (type: string) + key expressions: _col1 (type: string), _col2 (type: string) sort order: ++ - Map-reduce partition columns: _col1 (type: string), '2011-10-13' (type: string) + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col0 (type: string) @@ -521,7 +521,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (keymap is not null and (ds = '2011-10-15')) (type: boolean) + predicate: ((ds = '2011-10-15') and keymap is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: keymap (type: string), value (type: string) @@ -541,7 +541,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (keymap is not null and (ds = '2011-10-15')) (type: boolean) + predicate: ((ds = '2011-10-15') and keymap is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), keymap (type: string) diff --git ql/src/test/results/clientpositive/semijoin4.q.out ql/src/test/results/clientpositive/semijoin4.q.out index 77f2615..ec63813 100644 --- ql/src/test/results/clientpositive/semijoin4.q.out +++ ql/src/test/results/clientpositive/semijoin4.q.out @@ -69,7 +69,7 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((decimal1309_col_65 is not null and bigint_col_13 is not null) and UDFToInteger(tinyint_col_46) is not null) and (UDFToInteger(tinyint_col_46) = -92)) (type: boolean) + predicate: ((((UDFToInteger(tinyint_col_46) = -92) and decimal1309_col_65 is not null) and bigint_col_13 is not null) and UDFToInteger(tinyint_col_46) is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: bigint_col_13 (type: bigint), smallint_col_24 (type: smallint), double_col_60 (type: double), decimal1309_col_65 (type: decimal(13,9)) @@ -85,7 +85,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((tinyint_col_18 is not null and decimal2709_col_9 is not null) and UDFToInteger(tinyint_col_21) is not null) and (UDFToInteger(tinyint_col_21) = -92)) (type: boolean) + predicate: ((((UDFToInteger(tinyint_col_21) = -92) and tinyint_col_18 is not null) and decimal2709_col_9 is not null) and UDFToInteger(tinyint_col_21) is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_18 (type: tinyint) diff --git ql/src/test/results/clientpositive/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 57ee16a..7f44704 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -46,6 +46,8 @@ POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwr POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3 +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-4:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -71,9 +73,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Map-reduce partition columns: 5 (type: int) + sort order: Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE TableScan alias: b @@ -84,17 +84,15 @@ STAGE PLANS: Select Operator Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Map-reduce partition columns: 5 (type: int) + sort order: Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 + 1 Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -123,8 +121,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 5 (type: int) + 1 5 (type: int) Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int), 5 (type: int) @@ -150,9 +148,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Map-reduce partition columns: 5 (type: int) + sort order: Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE TableScan alias: d @@ -163,17 +159,15 @@ STAGE PLANS: Select Operator Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 5 (type: int) - sort order: + - Map-reduce partition columns: 5 (type: int) + sort order: Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 + 1 Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -188,6 +182,8 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[37][bigTable=?] in task 'Stage-9:MAPRED' is a cross product +Warning: Map Join MAPJOIN[38][bigTable=?] in task 'Stage-10:MAPRED' is a cross product PREHOOK: query: -- explain -- select * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join src c on a.key=c.value @@ -236,8 +232,8 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 5 (type: int) - 1 5 (type: int) + 0 + 1 Stage: Stage-9 Map Reduce @@ -254,8 +250,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 5 (type: int) - 1 5 (type: int) + 0 + 1 Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -367,8 +363,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 5 (type: int) + 1 5 (type: int) Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int), 5 (type: int) @@ -400,8 +396,8 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 5 (type: int) - 1 5 (type: int) + 0 + 1 Stage: Stage-10 Map Reduce @@ -418,8 +414,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 5 (type: int) - 1 5 (type: int) + 0 + 1 Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -436,6 +432,8 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[37][bigTable=?] in task 'Stage-9:MAPRED' is a cross product +Warning: Map Join MAPJOIN[38][bigTable=?] in task 'Stage-10:MAPRED' is a cross product PREHOOK: query: select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY PREHOOK: Input: default@smb_bucket_1 diff --git ql/src/test/results/clientpositive/subquery_exists.q.out ql/src/test/results/clientpositive/subquery_exists.q.out index 698db03..f3a2705 100644 --- ql/src/test/results/clientpositive/subquery_exists.q.out +++ ql/src/test/results/clientpositive/subquery_exists.q.out @@ -36,7 +36,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (value > 'val_9')) (type: boolean) + predicate: ((value > 'val_9') and key is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/subquery_in.q.out ql/src/test/results/clientpositive/subquery_in.q.out index 0bbefc2..0b294ae 100644 --- ql/src/test/results/clientpositive/subquery_in.q.out +++ ql/src/test/results/clientpositive/subquery_in.q.out @@ -136,7 +136,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key > '9')) (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -669,7 +669,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key > '9')) (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -790,7 +790,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) + predicate: (((l_linenumber = 1) and l_partkey is not null) and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) diff --git ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 4cc5424..5cd4b76 100644 --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -52,7 +52,7 @@ STAGE PLANS: alias: src11 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (value1 is not null and (key1 > '9')) (type: boolean) + predicate: ((key1 > '9') and value1 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key1 (type: string), value1 (type: string) @@ -122,7 +122,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key > '9')) (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -586,7 +586,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key > '9')) (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/table_access_keys_stats.q.out ql/src/test/results/clientpositive/table_access_keys_stats.q.out index f2c7b08..bbf24fd 100644 --- ql/src/test/results/clientpositive/table_access_keys_stats.q.out +++ ql/src/test/results/clientpositive/table_access_keys_stats.q.out @@ -509,7 +509,7 @@ Keys:val,key Table:default@t2 Keys:key -Operator:GBY_16 +Operator:GBY_17 Table:default@t3 Keys:val diff --git ql/src/test/results/clientpositive/union_remove_19.q.out ql/src/test/results/clientpositive/union_remove_19.q.out index bc00e16..ffcc0cb 100644 --- ql/src/test/results/clientpositive/union_remove_19.q.out +++ ql/src/test/results/clientpositive/union_remove_19.q.out @@ -259,25 +259,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) = 7.0) (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '7' (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -285,10 +285,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -310,25 +310,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inputtbl1 - Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) = 7.0) (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '7' (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -336,10 +336,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/vector_decimal_udf.q.out ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 5de02ef..5c43c51 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -1191,7 +1191,7 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key <> 0)) (type: boolean) + predicate: (key <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / key) (type: decimal(38,24)) @@ -1270,7 +1270,7 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value <> 0)) (type: boolean) + predicate: (value <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) @@ -1339,7 +1339,7 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value <> 0)) (type: boolean) + predicate: (value <> 0) (type: boolean) Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) diff --git ql/src/test/results/clientpositive/vector_if_expr.q.out ql/src/test/results/clientpositive/vector_if_expr.q.out index ad1cfc7..16fbb03 100644 --- ql/src/test/results/clientpositive/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/vector_if_expr.q.out @@ -16,7 +16,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cboolean1 is not null and cboolean1) (type: boolean) + predicate: (cboolean1 and cboolean1 is not null) (type: boolean) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index e8cd48a..dd40f28 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -222,7 +222,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) + predicate: (((l_linenumber = 1) and l_partkey is not null) and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -466,7 +466,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) + predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) @@ -489,7 +489,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) + predicate: (((l_linenumber = 1) and l_partkey is not null) and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) diff --git ql/src/test/results/clientpositive/vectorization_short_regress.q.out ql/src/test/results/clientpositive/vectorization_short_regress.q.out index 78b5d07..90ba080 100644 --- ql/src/test/results/clientpositive/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/vectorization_short_regress.q.out @@ -2630,7 +2630,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cboolean1 is not null and (((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (UDFToDouble(cbigint) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null))) (type: boolean) + predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (UDFToDouble(cbigint) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint)