diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java new file mode 100644 index 0000000..e4d5c3a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -0,0 +1,2545 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.UndeclaredThrowableException; +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import org.antlr.runtime.tree.TreeVisitor; +import org.antlr.runtime.tree.TreeVisitorAction; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptQuery; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptSchema; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.hep.HepMatchOrder; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationImpl; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; +import org.apache.calcite.rel.rules.FilterMergeRule; +import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; +import org.apache.calcite.rel.rules.JoinToMultiJoinRule; +import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; +import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; +import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; +import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexFieldCollation; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexWindowBound; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlWindow; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql2rel.RelFieldTrimmer; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.util.CompositeList; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.QueryProperties; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; +import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.ImmutableList.Builder; + +public class CalcitePlanner { + private final SemanticAnalyzer semAnalyzer; + private AtomicInteger noColsMissingStats = new AtomicInteger(0); + private List topLevelFieldSchema; + private SemanticException semanticException; + + public CalcitePlanner(SemanticAnalyzer semAnalyzer) throws SemanticException { + this.semAnalyzer = semAnalyzer; + } + + /** + * Can CBO handle the given AST? + * + * @param ast + * Top level AST + * @param qb + * top level QB corresponding to the AST + * @param cboCtx + * @param semAnalyzer + * @return boolean + * + * Assumption:
+ * If top level QB is query then everything below it must also be + * Query. + */ + static boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx, + SemanticAnalyzer semAnalyzer) { + int root = ast.getToken().getType(); + boolean needToLogMessage = semAnalyzer.LOG.isInfoEnabled(); + boolean isSupportedRoot = root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN + || qb.isCTAS(); + boolean isSupportedType = qb.getIsQuery() || qb.isCTAS() + || cboCtx.type == PreCboCtx.Type.INSERT; + boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast); + boolean result = isSupportedRoot && isSupportedType && semAnalyzer.getCreateViewDesc() == null + && noBadTokens; + + if (!result) { + if (needToLogMessage) { + String msg = ""; + if (!isSupportedRoot) + msg += "doesn't have QUERY or EXPLAIN as root and not a CTAS; "; + if (!isSupportedType) + msg += "is not a query, CTAS, or insert; "; + if (semAnalyzer.getCreateViewDesc() != null) + msg += "has create view; "; + if (!noBadTokens) + msg += "has unsupported tokens; "; + + if (msg.isEmpty()) + msg += "has some unspecified limitations; "; + semAnalyzer.LOG.info("Not invoking CBO because the statement " + + msg.substring(0, msg.length() - 2)); + } + return false; + } + // Now check QB in more detail. canHandleQbForCbo returns null if query can + // be handled. + String msg = CalcitePlanner.canHandleQbForCbo(semAnalyzer.queryProperties, semAnalyzer.conf, + true, needToLogMessage); + if (msg == null) { + return true; + } + if (needToLogMessage) { + semAnalyzer.LOG.info("Not invoking CBO because the statement " + + msg.substring(0, msg.length() - 2)); + } + return false; + } + + /** + * Checks whether Calcite can handle the query. + * + * @param queryProperties + * @param conf + * @param topLevelQB + * Does QB corresponds to top most query block? + * @param verbose + * Whether return value should be verbose in case of failure. + * @return null if the query can be handled; non-null reason string if it + * cannot be. + * + * Assumption:
+ * 1. If top level QB is query then everything below it must also be + * Query
+ * 2. Nested Subquery will return false for qbToChk.getIsQuery() + */ + static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, + boolean topLevelQB, boolean verbose) { + boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); + boolean isStrictTest = isInTest + && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); + boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest; + + if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() + && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() + && !queryProperties.hasPTF() && !queryProperties.usesScript() + && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { + // Ok to run CBO. + return null; + } + + // Not ok to run CBO, build error message. + String msg = ""; + if (verbose) { + if (isStrictTest) + msg += "is in test running in mode other than nonstrict; "; + if (!hasEnoughJoins) + msg += "has too few joins; "; + if (queryProperties.hasClusterBy()) + msg += "has cluster by; "; + if (queryProperties.hasDistributeBy()) + msg += "has distribute by; "; + if (queryProperties.hasSortBy()) + msg += "has sort by; "; + if (queryProperties.hasPTF()) + msg += "has PTF; "; + if (queryProperties.usesScript()) + msg += "uses scripts; "; + if (queryProperties.hasMultiDestQuery()) + msg += "is a multi-destination query; "; + if (queryProperties.hasLateralViews()) + msg += "has lateral views; "; + + if (msg.isEmpty()) + msg += "has some unspecified limitations; "; + } + return msg; + } + + /** + * The context that doPhase1 uses to populate information pertaining to CBO + * (currently, this is used for CTAS and insert-as-select). + */ + static class PreCboCtx { + enum Type { + NONE, INSERT, CTAS, UNEXPECTED + } + + public ASTNode nodeOfInterest; + public Type type = Type.NONE; + + public void set(Type type, ASTNode ast) { + if (this.type != Type.NONE) { + SemanticAnalyzer.STATIC_LOG.warn("Setting " + type + " when already " + this.type + + "; node " + ast.dump() + " vs old node " + nodeOfInterest.dump()); + this.type = Type.UNEXPECTED; + return; + } + this.type = type; + this.nodeOfInterest = ast; + } + } + + ASTNode fixUpCtasAndInsertAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) + throws SemanticException { + switch (cboCtx.type) { + + case NONE: + // nothing to do + return newAst; + + case CTAS: { + // Patch the optimized query back into original CTAS AST, replacing the + // original query. + replaceASTChild(cboCtx.nodeOfInterest, newAst); + return originalAst; + } + + case INSERT: { + // We need to patch the dest back to original into new query. + // This makes assumptions about the structure of the AST. + ASTNode newDest = semAnalyzer.astSearcher.simpleBreadthFirstSearch(newAst, + HiveParser.TOK_QUERY, HiveParser.TOK_INSERT, HiveParser.TOK_DESTINATION); + if (newDest == null) { + semAnalyzer.LOG.error("Cannot find destination after CBO; new ast is " + newAst.dump()); + throw new SemanticException("Cannot find destination after CBO"); + } + replaceASTChild(newDest, cboCtx.nodeOfInterest); + return newAst; + } + + default: + throw new AssertionError("Unexpected type " + cboCtx.type); + } + } + + ASTNode reAnalyzeCtasAfterCbo(ASTNode newAst) throws SemanticException { + // analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it + // here. + semAnalyzer.setAST(newAst); + newAst = semAnalyzer.analyzeCreateTable(newAst, semAnalyzer.getQB(), null); + if (newAst == null) { + semAnalyzer.LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" + + " new ast is " + semAnalyzer.getAST().dump()); + throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO"); + } + return newAst; + } + + /** + * Performs breadth-first search of the AST for a nested set of tokens. Tokens + * don't have to be each others' direct children, they can be separated by + * layers of other tokens. For each token in the list, the first one found is + * matched and there's no backtracking; thus, if AST has multiple instances of + * some token, of which only one matches, it is not guaranteed to be found. We + * use this for simple things. Not thread-safe - reuses searchQueue. + */ + static class ASTSearcher { + private final LinkedList searchQueue = new LinkedList(); + + public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) { + searchQueue.clear(); + searchQueue.add(ast); + for (int i = 0; i < tokens.length; ++i) { + boolean found = false; + int token = tokens[i]; + while (!searchQueue.isEmpty() && !found) { + ASTNode next = searchQueue.poll(); + found = next.getType() == token; + if (found) { + if (i == tokens.length - 1) + return next; + searchQueue.clear(); + } + for (int j = 0; j < next.getChildCount(); ++j) { + searchQueue.add((ASTNode) next.getChild(j)); + } + } + if (!found) + return null; + } + return null; + } + } + + private static void replaceASTChild(ASTNode child, ASTNode newChild) { + ASTNode parent = (ASTNode) child.parent; + int childIndex = child.childIndex; + parent.deleteChild(childIndex); + parent.insertChild(childIndex, newChild); + } + + /** + * Get Optimized AST for the given QB tree in the semAnalyzer. + * + * @return Optimized operator tree translated in to Hive AST + * @throws SemanticException + */ + ASTNode getOptimizedAST(Map partitionCache) throws SemanticException { + ASTNode optiqOptimizedAST = null; + RelNode optimizedOptiqPlan = null; + CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction( + semAnalyzer.prunedPartitions); + + try { + optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks + .newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); + } catch (Exception e) { + rethrowCalciteException(e); + throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); + } + optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, topLevelFieldSchema); + + return optiqOptimizedAST; + } + + /** + * Number of columns that is missing stats. Should only be called after + * getOptimizedAST(). + * + * @return int + */ + int getNumberOfColsMissingStats() { + return noColsMissingStats.get(); + } + + /*** + * Unwraps Calcite Invocation exceptions coming meta data provider chain and + * obtains the real cause. + * + * @param Exception + */ + private void rethrowCalciteException(Exception e) throws SemanticException { + Throwable first = (semanticException != null) ? semanticException : e, current = first, cause = current + .getCause(); + while (cause != null) { + Throwable causeOfCause = cause.getCause(); + if (current == first && causeOfCause == null && isUselessCause(first)) { + // "cause" is a root cause, and "e"/"first" is a useless + // exception it's wrapped in. + first = cause; + break; + } else if (causeOfCause != null && isUselessCause(cause) + && ExceptionHelper.resetCause(current, causeOfCause)) { + // "cause" was a useless intermediate cause and was replace it + // with its own cause. + cause = causeOfCause; + continue; // do loop once again with the new cause of "current" + } + current = cause; + cause = current.getCause(); + } + + if (first instanceof RuntimeException) { + throw (RuntimeException) first; + } else if (first instanceof SemanticException) { + throw (SemanticException) first; + } + throw new RuntimeException(first); + } + + private static class ExceptionHelper { + private static final Field CAUSE_FIELD = getField(Throwable.class, "cause"), + TARGET_FIELD = getField(InvocationTargetException.class, "target"), + MESSAGE_FIELD = getField(Throwable.class, "detailMessage"); + + private static Field getField(Class clazz, String name) { + try { + Field f = clazz.getDeclaredField(name); + f.setAccessible(true); + return f; + } catch (Throwable t) { + return null; + } + } + + public static boolean resetCause(Throwable target, Throwable newCause) { + try { + if (MESSAGE_FIELD == null) + return false; + Field field = (target instanceof InvocationTargetException) ? TARGET_FIELD : CAUSE_FIELD; + if (field == null) + return false; + + Throwable oldCause = target.getCause(); + String oldMsg = target.getMessage(); + field.set(target, newCause); + if (oldMsg != null && oldMsg.equals(oldCause.toString())) { + MESSAGE_FIELD.set(target, newCause == null ? null : newCause.toString()); + } + } catch (Throwable se) { + return false; + } + return true; + } + } + + private boolean isUselessCause(Throwable t) { + return t instanceof RuntimeException || t instanceof InvocationTargetException + || t instanceof UndeclaredThrowableException; + } + + /** + * Code responsible for Calcite plan generation and optimization. + */ + private class CalcitePlannerAction implements Frameworks.PlannerAction { + private RelOptCluster cluster; + private RelOptSchema relOptSchema; + private Map partitionCache; + + // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or + // just last one. + LinkedHashMap relToHiveRR = new LinkedHashMap(); + LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); + + CalcitePlannerAction(Map partitionCache) { + this.partitionCache = partitionCache; + } + + @Override + public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { + RelNode calciteGenPlan = null; + RelNode calcitePreCboPlan = null; + RelNode calciteOptimizedPlan = null; + + /* + * recreate cluster, so that it picks up the additional traitDef + */ + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); + final RelOptQuery query = new RelOptQuery(planner); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); + + this.cluster = cluster; + this.relOptSchema = relOptSchema; + + // 1. Gen Calcite Plan + try { + calciteGenPlan = genLogicalPlan(semAnalyzer.getQB(), true); + topLevelFieldSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema(relToHiveRR + .get(calciteGenPlan), HiveConf.getBoolVar(semAnalyzer.conf, + HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); + } catch (SemanticException e) { + semanticException = e; + throw new RuntimeException(e); + } + + // 2. Apply Pre Join Order optimizations + calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, + HiveDefaultRelMetadataProvider.INSTANCE); + + // 3. Appy Join Order Optimizations using Hep Planner (MST Algorithm) + List list = Lists.newArrayList(); + list.add(HiveDefaultRelMetadataProvider.INSTANCE); + RelTraitSet desiredTraits = cluster + .traitSetOf(HiveRelNode.CONVENTION, RelCollationImpl.EMPTY); + + HepProgram hepPgm = null; + HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); + hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, + HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); + + hepPgm = hepPgmBldr.build(); + HepPlanner hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + + RelNode rootRel = calcitePreCboPlan; + hepPlanner.setRoot(rootRel); + if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { + rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); + } + hepPlanner.setRoot(rootRel); + + calciteOptimizedPlan = hepPlanner.findBestExp(); + + if (semAnalyzer.LOG.isDebugEnabled() && !semAnalyzer.conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { + semAnalyzer.LOG.debug("CBO Planning details:\n"); + semAnalyzer.LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); + semAnalyzer.LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n" + + RelOptUtil.toString(calcitePreCboPlan)); + semAnalyzer.LOG.debug("Plan After Join Reordering:\n" + + RelOptUtil.toString(calciteOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); + } + + return calciteOptimizedPlan; + } + + /** + * Perform all optimizations before Join Ordering. + * + * @param basePlan + * original plan + * @param mdProvider + * meta data provider + * @return + */ + private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { + + // TODO: Decorelation of subquery should be done before attempting + // Partition Pruning; otherwise Expression evaluation may try to execute + // corelated sub query. + + // 1. Push Down Semi Joins + basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, + SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); + + // 2. PPD + basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( + Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, + HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( + HiveFilter.DEFAULT_FILTER_FACTORY), + new FilterMergeRule(HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); + + // 3. Transitive inference & Partition Pruning + basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( + Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), + // TODO: Enable it after CALCITE-407 is fixed + // RemoveTrivialProjectRule.INSTANCE, + new HivePartitionPruneRule(semAnalyzer.conf)); + + // 4. Projection Pruning + RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, + HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, + RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, + HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); + basePlan = fieldTrimmer.trim(basePlan); + + // 5. Rerun PPD through Project as column pruning would have introduced DT + // above scans + basePlan = hepPlan(basePlan, true, mdProvider, + new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY)); + + return basePlan; + } + + /** + * Run the HEP Planner with the given rule set. + * + * @param basePlan + * @param followPlanChanges + * @param mdProvider + * @param rules + * @return optimized RelNode + */ + private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, RelOptRule... rules) { + + RelNode optimizedRelNode = basePlan; + HepProgramBuilder programBuilder = new HepProgramBuilder(); + if (followPlanChanges) { + programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN); + programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); + } else { + // TODO: Should this be also TOP_DOWN? + for (RelOptRule r : rules) + programBuilder.addRuleInstance(r); + } + + HepPlanner planner = new HepPlanner(programBuilder.build()); + List list = Lists.newArrayList(); + list.add(mdProvider); + planner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + basePlan.getCluster().setMetadataProvider( + new CachingRelMetadataProvider(chainedProvider, planner)); + + planner.setRoot(basePlan); + optimizedRelNode = planner.findBestExp(); + + return optimizedRelNode; + } + + @SuppressWarnings("nls") + private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, + String rightalias, RelNode rightRel) throws SemanticException { + HiveUnion unionRel = null; + + // 1. Get Row Resolvers, Column map for original left and right input of + // Union Rel + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + HashMap leftmap = leftRR.getFieldMap(leftalias); + HashMap rightmap = rightRR.getFieldMap(rightalias); + + // 2. Validate that Union is feasible according to Hive (by using type + // info from RR) + if (leftmap.size() != rightmap.size()) { + throw new SemanticException("Schema of both sides of union should match."); + } + + ASTNode tabref = semAnalyzer.getQB().getAliases().isEmpty() ? null : semAnalyzer.getQB() + .getParseInfo().getSrcForAlias(semAnalyzer.getQB().getAliases().get(0)); + for (Map.Entry lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + if (rInfo == null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(tabref, + "Schema of both sides of union should match. " + rightalias + + " does not have the field " + field)); + } + if (lInfo == null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(tabref, + "Schema of both sides of union should match. " + leftalias + + " does not have the field " + field)); + } + if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { + throw new CalciteSemanticException(SemanticAnalyzer.generateErrorMessage( + tabref, + "Schema of both sides of union should match: field " + field + ":" + + " appears on the left side of the UNION at column position: " + + SemanticAnalyzer.getPositionFromInternalName(lInfo.getInternalName()) + + ", and on the right side of the UNION at column position: " + + SemanticAnalyzer.getPositionFromInternalName(rInfo.getInternalName()) + + ". Column positions should match for a UNION")); + } + // try widening coversion, otherwise fail union + TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType()); + if (commonTypeInfo == null) { + throw new CalciteSemanticException(SemanticAnalyzer.generateErrorMessage(tabref, + "Schema of both sides of union should match: Column " + field + " is of type " + + lInfo.getType().getTypeName() + " on first table and type " + + rInfo.getType().getTypeName() + " on second table")); + } + } + + // 3. construct Union Output RR using original left & right Input + RowResolver unionoutRR = new RowResolver(); + for (Map.Entry lEntry : leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + ColumnInfo unionColInfo = new ColumnInfo(lInfo); + unionColInfo.setTabAlias(unionalias); + unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + rInfo.getType())); + unionoutRR.put(unionalias, field, unionColInfo); + } + + // 4. Determine which columns requires cast on left/right input (Calcite + // requires exact types on both sides of union) + boolean leftNeedsTypeCast = false; + boolean rightNeedsTypeCast = false; + List leftProjs = new ArrayList(); + List rightProjs = new ArrayList(); + List leftRowDT = leftRel.getRowType().getFieldList(); + List rightRowDT = rightRel.getRowType().getFieldList(); + + RelDataType leftFieldDT; + RelDataType rightFieldDT; + RelDataType unionFieldDT; + for (int i = 0; i < leftRowDT.size(); i++) { + leftFieldDT = leftRowDT.get(i).getType(); + rightFieldDT = rightRowDT.get(i).getType(); + if (!leftFieldDT.equals(rightFieldDT)) { + unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(), + cluster.getTypeFactory()); + if (!unionFieldDT.equals(leftFieldDT)) { + leftNeedsTypeCast = true; + } + leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + + if (!unionFieldDT.equals(rightFieldDT)) { + rightNeedsTypeCast = true; + } + rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } else { + leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT, + cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); + rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT, + cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); + } + } + + // 5. Introduce Project Rel above original left/right inputs if cast is + // needed for type parity + RelNode unionLeftInput = leftRel; + RelNode unionRightInput = rightRel; + if (leftNeedsTypeCast) { + unionLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType() + .getFieldNames()); + } + if (rightNeedsTypeCast) { + unionRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType() + .getFieldNames()); + } + + // 6. Construct Union Rel + Builder bldr = new ImmutableList.Builder(); + bldr.add(unionLeftInput); + bldr.add(unionRightInput); + unionRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build()); + + relToHiveRR.put(unionRel, unionoutRR); + relToHiveColNameCalcitePosMap.put(unionRel, + this.buildHiveToCalciteColumnMap(unionoutRR, unionRel)); + + return unionRel; + } + + private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType, + ASTNode joinCond) throws SemanticException { + RelNode joinRel = null; + + // 1. construct the RowResolver for the new Join Node by combining row + // resolvers from left, right + RowResolver leftRR = this.relToHiveRR.get(leftRel); + RowResolver rightRR = this.relToHiveRR.get(rightRel); + RowResolver joinRR = null; + + if (hiveJoinType != JoinType.LEFTSEMI) { + joinRR = RowResolver.getCombinedRR(leftRR, rightRR); + } else { + joinRR = new RowResolver(); + if (!RowResolver.add(joinRR, leftRR)) { + semAnalyzer.LOG + .warn("Duplicates detected when adding columns to RR: see previous message"); + } + } + + // 2. Construct ExpressionNodeDesc representing Join Condition + RexNode calciteJoinCond = null; + if (joinCond != null) { + JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); + Map exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, + jCtx); + if (jCtx.getError() != null) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), + jCtx.getError())); + + ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); + + List inputRels = new ArrayList(); + inputRels.add(leftRel); + inputRels.add(rightRel); + calciteJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, + relToHiveRR, relToHiveColNameCalcitePosMap, false); + } else { + calciteJoinCond = cluster.getRexBuilder().makeLiteral(true); + } + + // 3. Validate that join condition is legal (i.e no function refering to + // both sides of join, only equi join) + // TODO: Join filter handling (only supported for OJ by runtime or is it + // supported for IJ as well) + + // 4. Construct Join Rel Node + boolean leftSemiJoin = false; + JoinRelType calciteJoinType; + switch (hiveJoinType) { + case LEFTOUTER: + calciteJoinType = JoinRelType.LEFT; + break; + case RIGHTOUTER: + calciteJoinType = JoinRelType.RIGHT; + break; + case FULLOUTER: + calciteJoinType = JoinRelType.FULL; + break; + case LEFTSEMI: + calciteJoinType = JoinRelType.INNER; + leftSemiJoin = true; + break; + case INNER: + default: + calciteJoinType = JoinRelType.INNER; + break; + } + + if (leftSemiJoin) { + List sysFieldList = new ArrayList(); + List leftJoinKeys = new ArrayList(); + List rightJoinKeys = new ArrayList(); + + RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, + calciteJoinCond, leftJoinKeys, rightJoinKeys, null, null); + + if (!nonEquiConds.isAlwaysTrue()) { + throw new SemanticException("Non equality condition not supported in Semi-Join" + + nonEquiConds); + } + + RelNode[] inputRels = new RelNode[] { leftRel, rightRel }; + final List leftKeys = new ArrayList(); + final List rightKeys = new ArrayList(); + calciteJoinCond = HiveCalciteUtil.projectNonColumnEquiConditions( + HiveProject.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, + leftKeys, rightKeys); + + joinRel = new SemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), inputRels[0], + inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys), + ImmutableIntList.copyOf(rightKeys)); + } else { + joinRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, calciteJoinType, + leftSemiJoin); + } + // 5. Add new JoinRel & its RR to the maps + relToHiveColNameCalcitePosMap.put(joinRel, this.buildHiveToCalciteColumnMap(joinRR, joinRel)); + relToHiveRR.put(joinRel, joinRR); + + return joinRel; + } + + /** + * Generate Join Logical Plan Relnode by walking through the join AST. + * + * @param qb + * @param aliasToRel + * Alias(Table/Relation alias) to RelNode; only read and not + * written in to by this method + * @return + * @throws SemanticException + */ + private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map aliasToRel) + throws SemanticException { + RelNode leftRel = null; + RelNode rightRel = null; + JoinType hiveJoinType = null; + + if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { + String msg = String.format("UNIQUE JOIN is currently not supported in CBO," + + " turn off cbo to use UNIQUE JOIN."); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 1. Determine Join Type + // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN + switch (joinParseTree.getToken().getType()) { + case HiveParser.TOK_LEFTOUTERJOIN: + hiveJoinType = JoinType.LEFTOUTER; + break; + case HiveParser.TOK_RIGHTOUTERJOIN: + hiveJoinType = JoinType.RIGHTOUTER; + break; + case HiveParser.TOK_FULLOUTERJOIN: + hiveJoinType = JoinType.FULLOUTER; + break; + case HiveParser.TOK_LEFTSEMIJOIN: + hiveJoinType = JoinType.LEFTSEMI; + break; + default: + hiveJoinType = JoinType.INNER; + break; + } + + // 2. Get Left Table Alias + ASTNode left = (ASTNode) joinParseTree.getChild(0); + if ((left.getToken().getType() == HiveParser.TOK_TABREF) + || (left.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( + (ASTNode) left.getChild(0)).toLowerCase(); + String leftTableAlias = left.getChildCount() == 1 ? tableName : SemanticAnalyzer + .unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer + .unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias; + leftRel = aliasToRel.get(leftTableAlias); + } else if (SemanticAnalyzer.isJoinToken(left)) { + leftRel = genJoinLogicalPlan(left, aliasToRel); + } else { + assert (false); + } + + // 3. Get Right Table Alias + ASTNode right = (ASTNode) joinParseTree.getChild(1); + if ((right.getToken().getType() == HiveParser.TOK_TABREF) + || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( + (ASTNode) right.getChild(0)).toLowerCase(); + String rightTableAlias = right.getChildCount() == 1 ? tableName : SemanticAnalyzer + .unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase()); + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // guranteed to have an lias here: check done in processJoin + rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer + .unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias; + rightRel = aliasToRel.get(rightTableAlias); + } else { + assert (false); + } + + // 4. Get Join Condn + ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); + + // 5. Create Join rel + return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond); + } + + private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException { + RowResolver rr = new RowResolver(); + HiveTableScan tableRel = null; + + try { + + // 1. If the table has a Sample specified, bail from Calcite path. + if (qb.getParseInfo().getTabSample(tableAlias) != null + || semAnalyzer.getNameToSplitSampleMap().containsKey(tableAlias)) { + String msg = String.format("Table Sample specified for %s." + + " Currently we don't support Table Sample clauses in CBO," + + " turn off cbo for queries on tableSamples.", tableAlias); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 2. Get Table Metadata + Table tab = qb.getMetaData().getSrcForAlias(tableAlias); + + // 3. Get Table Logical Schema (Row Type) + // NOTE: Table logical schema = Non Partition Cols + Partition Cols + + // Virtual Cols + + // 3.1 Add Column info for non partion cols (Object Inspector fields) + @SuppressWarnings("deprecation") + StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() + .getObjectInspector(); + List fields = rowObjectInspector.getAllStructFieldRefs(); + ColumnInfo colInfo; + String colName; + ArrayList cInfoLst = new ArrayList(); + for (int i = 0; i < fields.size(); i++) { + colName = fields.get(i).getFieldName(); + colInfo = new ColumnInfo( + fields.get(i).getFieldName(), + TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), + tableAlias, false); + colInfo.setSkewedCol((SemanticAnalyzer.isSkewedCol(tableAlias, qb, colName)) ? true + : false); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + } + // TODO: Fix this + ArrayList nonPartitionColumns = new ArrayList(cInfoLst); + ArrayList partitionColumns = new ArrayList(); + + // 3.2 Add column info corresponding to partition columns + for (FieldSchema part_col : tab.getPartCols()) { + colName = part_col.getName(); + colInfo = new ColumnInfo(colName, + TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + partitionColumns.add(colInfo); + } + + // 3.3 Add column info corresponding to virtual columns + Iterator vcs = VirtualColumn.getRegistry(semAnalyzer.conf).iterator(); + while (vcs.hasNext()) { + VirtualColumn vc = vcs.next(); + colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, + vc.getIsHidden()); + rr.put(tableAlias, vc.getName(), colInfo); + cInfoLst.add(colInfo); + } + + // 3.4 Build row type from field + RelDataType rowType = TypeConverter.getType(cluster, rr, null); + + // 4. Build RelOptAbstractTable + String fullyQualifiedTabName = tab.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) + fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); + else + fullyQualifiedTabName = tab.getTableName(); + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, semAnalyzer.conf, + partitionCache, noColsMissingStats); + + // 5. Build Hive Table Scan Rel + tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, + rowType); + + // 6. Add Schema(RR) to RelNode-Schema map + ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, + tableRel); + relToHiveRR.put(tableRel, rr); + relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap); + } catch (Exception e) { + if (e instanceof SemanticException) { + throw (SemanticException) e; + } else { + throw (new RuntimeException(e)); + } + } + + return tableRel; + } + + private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException { + ExprNodeDesc filterCondn = semAnalyzer.genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel)); + if (filterCondn instanceof ExprNodeConstantDesc + && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { + // queries like select * from t1 where 'foo'; + // Calcite's rule PushFilterThroughProject chokes on it. Arguably, we + // can insert a cast to + // boolean in such cases, but since Postgres, Oracle and MS SQL server + // fail on compile time + // for such queries, its an arcane corner case, not worth of adding that + // complexity. + throw new CalciteSemanticException("Filter expression with non-boolean return type."); + } + ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap + .get(srcRel); + RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), + hiveColNameCalcitePosMap, 0, true).convert(filterCondn); + RexNode factoredFilterExpr = RexUtil + .pullFactors(cluster.getRexBuilder(), convertedFilterExpr); + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + srcRel, factoredFilterExpr); + this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); + relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); + relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); + + return filterRel; + } + + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, + Map aliasToRel, boolean forHavingClause) throws SemanticException { + /* + * Handle Subquery predicates. + * + * Notes (8/22/14 hb): Why is this a copy of the code from {@link + * #genFilterPlan} - for now we will support the same behavior as non CBO + * route. - but plan to allow nested SubQueries(Restriction.9.m) and + * multiple SubQuery expressions(Restriction.8.m). This requires use to + * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush + * out Null semantics(CALCITE-373) - besides only the driving code has + * been copied. Most of the code which is SubQueryUtils and QBSubQuery is + * reused. + */ + int numSrcColumns = srcRel.getRowType().getFieldCount(); + List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + if (subQueriesInOriginalTree.size() > 0) { + + /* + * Restriction.9.m :: disallow nested SubQuery expressions. + */ + if (qb.getSubQueryPredicateDef() != null) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); + } + + /* + * Restriction.8.m :: We allow only 1 SubQuery expression per Query. + */ + if (subQueriesInOriginalTree.size() > 1) { + + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); + } + + /* + * Clone the Search AST; apply all rewrites on the clone. + */ + ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); + List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outerQBRR = inputRR; + ImmutableMap outerQBPosMap = relToHiveColNameCalcitePosMap.get(srcRel); + + for (int i = 0; i < subQueries.size(); i++) { + ASTNode subQueryAST = subQueries.get(i); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + + int sqIdx = qb.incrNumSubQueryPredicates(); + clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, + originalSubQueryAST, semAnalyzer.ctx); + + if (!forHavingClause) { + qb.setWhereClauseSubQueryPredicate(subQuery); + } else { + qb.setHavingClauseSubQueryPredicate(subQuery); + } + String havingInputAlias = null; + + if (forHavingClause) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToRel.put(havingInputAlias, srcRel); + } + + subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, + aliasToRel.keySet()); + + QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); + qbSQ.setSubQueryDef(subQuery.getSubQuery()); + Phase1Ctx ctx_1 = semAnalyzer.initPhase1Ctx(); + semAnalyzer.doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null); + semAnalyzer.getMetaData(qbSQ); + RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); + aliasToRel.put(subQuery.getAlias(), subQueryRelNode); + RowResolver sqRR = relToHiveRR.get(subQueryRelNode); + + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if (subQuery.getOperator().getType() != SubQueryType.EXISTS + && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, + "SubQuery can contain only 1 item in Select List.")); + } + + /* + * If this is a Not In SubQuery Predicate then Join in the Null Check + * SubQuery. See QBSubQuery.NotInCheck for details on why and how this + * is constructed. + */ + if (subQuery.getNotInCheck() != null) { + QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); + notInCheck.setSQRR(sqRR); + QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); + qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); + ctx_1 = semAnalyzer.initPhase1Ctx(); + semAnalyzer.doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null); + semAnalyzer.getMetaData(qbSQ_nic); + RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); + aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); + srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, + // set explicitly to inner until we figure out SemiJoin use + // notInCheck.getJoinType(), + JoinType.INNER, notInCheck.getJoinConditionAST()); + inputRR = relToHiveRR.get(srcRel); + if (forHavingClause) { + aliasToRel.put(havingInputAlias, srcRel); + } + } + + /* + * Gen Join between outer Operator and SQ op + */ + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); + srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), + subQuery.getJoinConditionAST()); + searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); + + srcRel = genFilterRelNode(searchCond, srcRel); + + /* + * For Not Exists and Not In, add a projection on top of the Left + * Outer Join. + */ + if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS + || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { + srcRel = projectLeftOuterSide(srcRel, numSrcColumns); + } + } + relToHiveRR.put(srcRel, outerQBRR); + relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap); + return srcRel; + } + + return genFilterRelNode(searchCond, srcRel); + } + + private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { + RowResolver iRR = relToHiveRR.get(srcRel); + RowResolver oRR = new RowResolver(); + RowResolver.add(oRR, iRR, numColumns); + + List calciteColLst = new ArrayList(); + List oFieldNames = new ArrayList(); + RelDataType iType = srcRel.getRowType(); + + for (int i = 0; i < iType.getFieldCount(); i++) { + RelDataTypeField fType = iType.getFieldList().get(i); + String fName = iType.getFieldNames().get(i); + calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); + oFieldNames.add(fName); + } + + HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); + + this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel)); + this.relToHiveRR.put(selRel, oRR); + return selRel; + } + + private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel, + boolean forHavingClause) throws SemanticException { + RelNode filterRel = null; + + Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() + .iterator(); + if (whereClauseIterator.hasNext()) { + filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, + aliasToRel, forHavingClause); + } + + return filterRel; + } + + /** + * Class to store GenericUDAF related information. + */ + private class AggInfo { + private final List m_aggParams; + private final TypeInfo m_returnType; + private final String m_udfName; + private final boolean m_distinct; + + private AggInfo(List aggParams, TypeInfo returnType, String udfName, + boolean isDistinct) { + m_aggParams = aggParams; + m_returnType = returnType; + m_udfName = udfName; + m_distinct = isDistinct; + } + } + + private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbChildProjLst, + RexNodeConverter converter, HashMap rexNodeToPosMap, + Integer childProjLstIndx) throws SemanticException { + + // 1. Get agg fn ret type in Calcite + RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, + this.cluster.getTypeFactory()); + + // 2. Convert Agg Fn args and type of args to Calcite + // TODO: Does HQL allows expressions as aggregate args or can it only be + // projections from child? + Integer inputIndx; + List argList = new ArrayList(); + RexNode rexNd = null; + RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); + ImmutableList.Builder aggArgRelDTBldr = new ImmutableList.Builder(); + for (ExprNodeDesc expr : agg.m_aggParams) { + rexNd = converter.convert(expr); + inputIndx = rexNodeToPosMap.get(rexNd.toString()); + if (inputIndx == null) { + gbChildProjLst.add(rexNd); + rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); + inputIndx = childProjLstIndx; + childProjLstIndx++; + } + argList.add(inputIndx); + + // TODO: does arg need type cast? + aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); + } + + // 3. Get Aggregation FN from Calcite given name, ret type and input arg + // type + final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, + aggArgRelDTBldr.build(), aggFnRetType); + + return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); + } + + private RelNode genGBRelNode(List gbExprs, List aggInfoLst, + RelNode srcRel) throws SemanticException { + ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); + RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), posMap, + 0, false); + + final List gbChildProjLst = Lists.newArrayList(); + final HashMap rexNodeToPosMap = new HashMap(); + final List groupSetPositions = Lists.newArrayList(); + Integer gbIndx = 0; + RexNode rnd; + for (ExprNodeDesc key : gbExprs) { + rnd = converter.convert(key); + gbChildProjLst.add(rnd); + groupSetPositions.add(gbIndx); + rexNodeToPosMap.put(rnd.toString(), gbIndx); + gbIndx++; + } + final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); + + List aggregateCalls = Lists.newArrayList(); + for (AggInfo agg : aggInfoLst) { + aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap, + gbChildProjLst.size())); + } + + if (gbChildProjLst.isEmpty()) { + // This will happen for count(*), in such cases we arbitarily pick + // first element from srcRel + gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); + } + RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); + + HiveRelNode aggregateRel = null; + try { + aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), + gbInputRel, false, groupSet, null, aggregateCalls); + } catch (InvalidRelException e) { + throw new SemanticException(e); + } + + return aggregateRel; + } + + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, + RowResolver gByInputRR, RowResolver gByRR) { + if (gByExpr.getType() == HiveParser.DOT + && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0) + .getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an + * entry based on that tab_alias. For e.g. this query: select b.x, + * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the + * GBy RR. tab_alias=b comes from looking at the RowResolver that is the + * ancestor before any GBy/ReduceSinks added for the GBY operation. + */ + try { + ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch (SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } + } + + private void addToGBExpr(RowResolver groupByOutputRowResolver, + RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, + List gbExprNDescLst, List outputColumnNames) { + // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is + // UDF + int i = gbExprNDescLst.size(); + String field = SemanticAnalyzer.getColumnInternalName(i); + outputColumnNames.add(field); + gbExprNDescLst.add(grpbyExprNDesc); + + ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); + groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); + + addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, + groupByOutputRowResolver); + } + + private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) + throws SemanticException { + AggInfo aInfo = null; + + // 1 Convert UDAF Params to ExprNodeDesc + ArrayList aggParameters = new ArrayList(); + for (int i = 1; i <= aggFnLstArgIndx; i++) { + ASTNode paraExpr = (ASTNode) aggAst.getChild(i); + ExprNodeDesc paraExprNode = semAnalyzer.genExprNodeDesc(paraExpr, inputRR); + aggParameters.add(paraExprNode); + } + + // 2. Is this distinct UDAF + boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; + + // 3. Determine type of UDAF + TypeInfo udafRetType = null; + + // 3.1 Obtain UDAF name + String aggName = SemanticAnalyzer.unescapeIdentifier(aggAst.getChild(0).getText()); + + // 3.2 Rank functions type is 'int'/'double' + if (FunctionRegistry.isRankingFunction(aggName)) { + if (aggName.equalsIgnoreCase("percent_rank")) + udafRetType = TypeInfoFactory.doubleTypeInfo; + else + udafRetType = TypeInfoFactory.intTypeInfo; + } else { + // 3.3 Try obtaining UDAF evaluators to determine the ret type + try { + boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 3.3.1 Get UDAF Evaluator + Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, + isDistinct); + + GenericUDAFEvaluator genericUDAFEvaluator = null; + if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) + || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { + ArrayList originalParameterTypeInfos = SemanticAnalyzer + .getWritableObjectInspector(aggParameters); + genericUDAFEvaluator = FunctionRegistry.getGenericWindowingEvaluator(aggName, + originalParameterTypeInfos, isDistinct, isAllColumns); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + udafRetType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); + } else { + genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(aggName, aggParameters, + aggAst, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + + // 3.3.2 Get UDAF Info using UDAF Evaluator + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + udafRetType = udaf.returnType; + } + } catch (Exception e) { + semAnalyzer.LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName + + ", trying to translate to GenericUDF"); + } + + // 3.4 Try GenericUDF translation + if (udafRetType == null) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + tcCtx.setAllowDistinctFunctions(false); + ExprNodeDesc exp = semAnalyzer.genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, + tcCtx); + udafRetType = exp.getTypeInfo(); + } + } + + // 4. Construct AggInfo + aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); + + return aInfo; + } + + /** + * Generate GB plan. + * + * @param qb + * @param srcRel + * @return TODO: 1. Grouping Sets (roll up..) + * @throws SemanticException + */ + private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + RelNode gbRel = null; + QBParseInfo qbp = getQBParseInfo(qb); + + // 0. for GSets, Cube, Rollup, bail from Calcite path. + if (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() + || !qbp.getDestCubes().isEmpty()) { + String gbyClause = null; + HashMap gbysMap = qbp.getDestToGroupBy(); + if (gbysMap.size() == 1) { + ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue(); + gbyClause = semAnalyzer.ctx.getTokenRewriteStream().toString(gbyAST.getTokenStartIndex(), + gbyAST.getTokenStopIndex()); + gbyClause = "in '" + gbyClause + "'."; + } else { + gbyClause = "."; + } + String msg = String.format("Encountered Grouping Set/Cube/Rollup%s" + + " Currently we don't support Grouping Set/Cube/Rollup" + " clauses in CBO," + + " turn off cbo for these queries.", gbyClause); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 1. Gather GB Expressions (AST) (GB + Aggregations) + // NOTE: Multi Insert is not supported + String detsClauseName = qbp.getClauseNames().iterator().next(); + List grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); + HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); + boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; + boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true + : false; + + if (hasGrpByAstExprs || hasAggregationTrees) { + ArrayList gbExprNDescLst = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(); + + // 2. Input, Output Row Resolvers + RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + + if (hasGrpByAstExprs) { + // 3. Construct GB Keys (ExprNode) + for (int i = 0; i < grpByAstExprs.size(); ++i) { + ASTNode grpbyExpr = grpByAstExprs.get(i); + Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( + grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); + ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); + if (grpbyExprNDesc == null) + throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); + + addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, + grpbyExprNDesc, gbExprNDescLst, outputColumnNames); + } + } + + // 4. Construct aggregation function Info + ArrayList aggregations = new ArrayList(); + if (hasAggregationTrees) { + assert (aggregationTrees != null); + for (ASTNode value : aggregationTrees.values()) { + // 4.1 Determine type of UDAF + // This is the GenericUDAF name + String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText()); + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; + + // 4.2 Convert UDAF Params to ExprNodeDesc + ArrayList aggParameters = new ArrayList(); + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode paraExpr = (ASTNode) value.getChild(i); + ExprNodeDesc paraExprNode = semAnalyzer.genExprNodeDesc(paraExpr, + groupByInputRowResolver); + aggParameters.add(paraExprNode); + } + + Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, + isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator( + aggName, aggParameters, value, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); + aggregations.add(aInfo); + String field = SemanticAnalyzer.getColumnInternalName(gbExprNDescLst.size() + + aggregations.size() - 1); + outputColumnNames.add(field); + groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, + "", false)); + } + } + + gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); + relToHiveColNameCalcitePosMap.put(gbRel, + buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); + this.relToHiveRR.put(gbRel, groupByOutputRowResolver); + } + + return gbRel; + } + + /** + * Generate OB RelNode and input Select RelNode that should be used to + * introduce top constraining Project. If Input select RelNode is not + * present then don't introduce top constraining select. + * + * @param qb + * @param srcRel + * @param outermostOB + * @return Pair Key- OB RelNode, Value - Input Select for + * top constraining Select + * @throws SemanticException + */ + private Pair genOBLogicalPlan(QB qb, RelNode srcRel, boolean outermostOB) + throws SemanticException { + RelNode sortRel = null; + RelNode originalOBChild = null; + + QBParseInfo qbp = getQBParseInfo(qb); + String dest = qbp.getClauseNames().iterator().next(); + ASTNode obAST = qbp.getOrderByForClause(dest); + + if (obAST != null) { + // 1. OB Expr sanity test + // in strict mode, in the presence of order by, limit must be specified + Integer limit = qb.getParseInfo().getDestLimit(dest); + if (semAnalyzer.conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") + && limit == null) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, + ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); + } + + // 2. Walk through OB exprs and extract field collations and additional + // virtual columns needed + final List newVCLst = new ArrayList(); + final List fieldCollations = Lists.newArrayList(); + int fieldIndex = 0; + + List obASTExprLst = obAST.getChildren(); + ASTNode obASTExpr; + List> vcASTTypePairs = new ArrayList>(); + RowResolver inputRR = relToHiveRR.get(srcRel); + RowResolver outputRR = new RowResolver(); + + RexNode rnd; + RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), + relToHiveColNameCalcitePosMap.get(srcRel), 0, false); + int srcRelRecordSz = srcRel.getRowType().getFieldCount(); + + for (int i = 0; i < obASTExprLst.size(); i++) { + // 2.1 Convert AST Expr to ExprNode + obASTExpr = (ASTNode) obASTExprLst.get(i); + Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( + obASTExpr, new TypeCheckCtx(inputRR)); + ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0)); + if (obExprNDesc == null) + throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + + // 2.2 Convert ExprNode to RexNode + rnd = converter.convert(obExprNDesc); + + // 2.3 Determine the index of ob expr in child schema + // NOTE: Calcite can not take compound exprs in OB without it being + // present in the child (& hence we add a child Project Rel) + if (rnd instanceof RexInputRef) { + fieldIndex = ((RexInputRef) rnd).getIndex(); + } else { + fieldIndex = srcRelRecordSz + newVCLst.size(); + newVCLst.add(rnd); + vcASTTypePairs.add(new Pair((ASTNode) obASTExpr.getChild(0), + obExprNDesc.getTypeInfo())); + } + + // 2.4 Determine the Direction of order by + org.apache.calcite.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + order = RelFieldCollation.Direction.ASCENDING; + } + + // 2.5 Add to field collations + fieldCollations.add(new RelFieldCollation(fieldIndex, order)); + } + + // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel + // for top constraining Sel + RelNode obInputRel = srcRel; + if (!newVCLst.isEmpty()) { + List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), + new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver obSyntheticProjectRR = new RowResolver(); + if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + int vcolPos = inputRR.getRowSchema().getSignature().size(); + for (Pair astTypePair : vcASTTypePairs) { + obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(vcolPos), astTypePair.getValue(), null, + false)); + vcolPos++; + } + obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), + obSyntheticProjectRR, srcRel); + + if (outermostOB) { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + + } else { + if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + originalOBChild = srcRel; + } + } else { + if (!RowResolver.add(outputRR, inputRR)) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + } + + // 4. Construct SortRel + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); + sortRel = new HiveSort(cluster, traitSet, obInputRel, canonizedCollation, null, null); + + // 5. Update the maps + // NOTE: Output RR for SortRel is considered same as its input; we may + // end up not using VC that is present in sort rel. Also note that + // rowtype of sortrel is the type of it child; if child happens to be + // synthetic project that we introduced then that projectrel would + // contain the vc. + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( + outputRR, sortRel); + relToHiveRR.put(sortRel, outputRR); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + } + + return (new Pair(sortRel, originalOBChild)); + } + + private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { + HiveRelNode sortRel = null; + QBParseInfo qbp = getQBParseInfo(qb); + Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); + + if (limit != null) { + RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit)); + RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); + RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY); + sortRel = new HiveSort(cluster, traitSet, srcRel, canonizedCollation, null, fetch); + + RowResolver outputRR = new RowResolver(); + if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) { + throw new CalciteSemanticException( + "Duplicates detected when adding columns to RR: see previous message"); + } + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( + outputRR, sortRel); + relToHiveRR.put(sortRel, outputRR); + relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); + } + + return sortRel; + } + + private List getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, + RowResolver inputRR) throws SemanticException { + List pKeys = new ArrayList(); + if (ps != null) { + List pExprs = ps.getExpressions(); + for (PartitionExpression pExpr : pExprs) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = semAnalyzer.genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx); + pKeys.add(converter.convert(exp)); + } + } + + return pKeys; + } + + private List getOrderKeys(OrderSpec os, RexNodeConverter converter, + RowResolver inputRR) throws SemanticException { + List oKeys = new ArrayList(); + if (os != null) { + List oExprs = os.getExpressions(); + for (OrderExpression oExpr : oExprs) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = semAnalyzer.genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx); + RexNode ordExp = converter.convert(exp); + Set flags = new HashSet(); + if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) + flags.add(SqlKind.DESCENDING); + oKeys.add(new RexFieldCollation(ordExp, flags)); + } + } + + return oKeys; + } + + private RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) { + RexWindowBound rwb = null; + + if (bs != null) { + SqlParserPos pos = new SqlParserPos(1, 1); + SqlNode amt = bs.getAmt() == 0 ? null : SqlLiteral.createExactNumeric( + String.valueOf(bs.getAmt()), new SqlParserPos(2, 2)); + RexNode amtLiteral = null; + SqlCall sc = null; + + if (amt != null) + amtLiteral = cluster.getRexBuilder().makeLiteral(new Integer(bs.getAmt()), + cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true); + + switch (bs.getDirection()) { + case PRECEDING: + if (amt == null) { + rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null); + } else { + sc = (SqlCall) SqlWindow.createPreceding(amt, pos); + rwb = RexWindowBound.create(sc, + cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); + } + break; + + case CURRENT: + rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null); + break; + + case FOLLOWING: + if (amt == null) { + rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)), + null); + } else { + sc = (SqlCall) SqlWindow.createFollowing(amt, pos); + rwb = RexWindowBound.create(sc, + cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); + } + break; + } + } + + return rwb; + } + + private int getWindowSpecIndx(ASTNode wndAST) { + int wi = wndAST.getChildCount() - 1; + if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) { + wi = -1; + } + + return wi; + } + + private Pair genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, + RelNode srcRel) throws SemanticException { + RexNode w = null; + TypeInfo wHiveRetType = null; + + if (wExpSpec instanceof WindowFunctionSpec) { + WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec; + ASTNode windowProjAst = wFnSpec.getExpression(); + // TODO: do we need to get to child? + int wndSpecASTIndx = getWindowSpecIndx(windowProjAst); + // 2. Get Hive Aggregate Info + AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, + this.relToHiveRR.get(srcRel)); + + // 3. Get Calcite Return type for Agg Fn + wHiveRetType = hiveAggInfo.m_returnType; + RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, + this.cluster.getTypeFactory()); + + // 4. Convert Agg Fn args to Calcite + ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); + RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), + posMap, 0, false); + Builder calciteAggFnArgsBldr = ImmutableList. builder(); + Builder calciteAggFnArgsTypeBldr = ImmutableList. builder(); + for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { + calciteAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); + calciteAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) + .getTypeInfo(), this.cluster.getTypeFactory())); + } + ImmutableList calciteAggFnArgs = calciteAggFnArgsBldr.build(); + ImmutableList calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build(); + + // 5. Get Calcite Agg Fn + final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn( + hiveAggInfo.m_udfName, calciteAggFnArgsType, calciteAggFnRetType); + + // 6. Translate Window spec + RowResolver inputRR = relToHiveRR.get(srcRel); + WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); + List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); + List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); + RexWindowBound upperBound = getBound(wndSpec.windowFrame.start, converter); + RexWindowBound lowerBound = getBound(wndSpec.windowFrame.end, converter); + boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true + : false; + + w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, + partitionKeys, ImmutableList. copyOf(orderKeys), lowerBound, + upperBound, isRows, true, false); + } else { + // TODO: Convert to Semantic Exception + throw new RuntimeException("Unsupported window Spec"); + } + + return new Pair(w, wHiveRetType); + } + + private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet newColumns) + throws SemanticException { + getQBParseInfo(qb); + WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs() + .values().iterator().next() : null; + if (wSpec == null) + return null; + // 1. Get valid Window Function Spec + wSpec.validateAndMakeEffective(); + List windowExpressions = wSpec.getWindowExpressions(); + if (windowExpressions == null || windowExpressions.isEmpty()) + return null; + + RowResolver inputRR = this.relToHiveRR.get(srcRel); + // 2. Get RexNodes for original Projections from below + List projsForWindowSelOp = new ArrayList( + HiveCalciteUtil.getProjsFromBelowAsInputRef(srcRel)); + + // 3. Construct new Row Resolver with everything from below. + RowResolver out_rwsch = new RowResolver(); + if (!RowResolver.add(out_rwsch, inputRR)) { + semAnalyzer.LOG.warn("Duplicates detected when adding columns to RR: see previous message"); + } + + // 4. Walk through Window Expressions & Construct RexNodes for those, + // Update out_rwsch + for (WindowExpressionSpec wExprSpec : windowExpressions) { + if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { + Pair wtp = genWindowingProj(qb, wExprSpec, srcRel); + projsForWindowSelOp.add(wtp.getKey()); + + // 6.2.2 Update Output Row Schema + ColumnInfo oColInfo = new ColumnInfo( + SemanticAnalyzer.getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), + null, false); + out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo); + newColumns.add(oColInfo); + } + } + + return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); + } + + private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, + RelNode srcRel) throws CalciteSemanticException { + // 1. Build Column Names + Set colNamesSet = new HashSet(); + List cInfoLst = out_rwsch.getRowSchema().getSignature(); + ArrayList columnNames = new ArrayList(); + String[] qualifiedColNames; + String tmpColAlias; + for (int i = 0; i < calciteColLst.size(); i++) { + ColumnInfo cInfo = cInfoLst.get(i); + qualifiedColNames = out_rwsch.reverseLookup(cInfo.getInternalName()); + /* + * if (qualifiedColNames[0] != null && !qualifiedColNames[0].isEmpty()) + * tmpColAlias = qualifiedColNames[0] + "." + qualifiedColNames[1]; else + */ + tmpColAlias = qualifiedColNames[1]; + + // Prepend column names with '_o_' if it starts with '_c' + /* + * Hive treats names that start with '_c' as internalNames; so change + * the names so we don't run into this issue when converting back to + * Hive AST. + */ + if (tmpColAlias.startsWith("_c")) + tmpColAlias = "_o_" + tmpColAlias; + int suffix = 1; + while (colNamesSet.contains(tmpColAlias)) { + tmpColAlias = qualifiedColNames[1] + suffix; + suffix++; + } + + colNamesSet.add(tmpColAlias); + columnNames.add(tmpColAlias); + } + + // 3 Build Calcite Rel Node for project using converted projections & col + // names + HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); + + // 4. Keep track of colname-to-posmap && RR for new select + this.relToHiveColNameCalcitePosMap + .put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); + this.relToHiveRR.put(selRel, out_rwsch); + + return selRel; + } + + /** + * NOTE: there can only be one select caluse since we don't handle multi + * destination insert. + * + * @throws SemanticException + */ + private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) + throws SemanticException { + // 0. Generate a Select Node for Windowing + // Exclude the newly-generated select columns from */etc. resolution. + HashSet excludedColumns = new HashSet(); + RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns); + srcRel = (selForWindow == null) ? srcRel : selForWindow; + + ArrayList col_list = new ArrayList(); + + // 1. Get Select Expression List + QBParseInfo qbp = getQBParseInfo(qb); + String selClauseName = qbp.getClauseNames().iterator().next(); + ASTNode selExprList = qbp.getSelForClause(selClauseName); + + // 2.Row resolvers for input, output + RowResolver out_rwsch = new RowResolver(); + Integer pos = Integer.valueOf(0); + // TODO: will this also fix windowing? try + RowResolver inputRR = this.relToHiveRR.get(srcRel), starRR = inputRR; + if (starSrcRel != null) { + starRR = this.relToHiveRR.get(starSrcRel); + } + + // 3. Query Hints + // TODO: Handle Query Hints; currently we ignore them + boolean selectStar = false; + int posn = 0; + boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); + if (hintPresent) { + String hint = semAnalyzer.ctx.getTokenRewriteStream().toString( + selExprList.getChild(0).getTokenStartIndex(), + selExprList.getChild(0).getTokenStopIndex()); + String msg = String.format("Hint specified for %s." + + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 4. Bailout if select involves Transform + boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); + if (isInTransform) { + String msg = String.format("SELECT TRANSFORM is currently not supported in CBO," + + " turn off cbo to use TRANSFORM."); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + + // 5. Bailout if select involves UDTF + ASTNode expr = (ASTNode) selExprList.getChild(posn).getChild(0); + int exprType = expr.getType(); + if (exprType == HiveParser.TOK_FUNCTION || exprType == HiveParser.TOK_FUNCTIONSTAR) { + String funcName = TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(expr, true); + FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); + if (fi != null && fi.getGenericUDTF() != null) { + String msg = String.format("UDTF " + funcName + " is currently not supported in CBO," + + " turn off cbo to use UDTF " + funcName); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + } + + // 6. Iterate over all expression (after SELECT) + ASTNode exprList = selExprList; + int startPosn = posn; + List tabAliasesForAllProjs = getTabAliases(starRR); + for (int i = startPosn; i < exprList.getChildCount(); ++i) { + + // 6.1 child can be EXPR AS ALIAS, or EXPR. + ASTNode child = (ASTNode) exprList.getChild(i); + boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); + + // 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's + // This check is not needed and invalid when there is a transform b/c + // the + // AST's are slightly different. + if (child.getChildCount() > 2) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage( + (ASTNode) child.getChild(2), ErrorMsg.INVALID_AS.getMsg())); + } + + String tabAlias; + String colAlias; + + // 6.3 Get rid of TOK_SELEXPR + expr = (ASTNode) child.getChild(0); + String[] colRef = SemanticAnalyzer.getColAlias(child, + semAnalyzer.getAutogenColAliasPrfxLbl(), inputRR, + semAnalyzer.autogenColAliasPrfxIncludeFuncName(), i); + tabAlias = colRef[0]; + colAlias = colRef[1]; + + // 6.4 Build ExprNode corresponding to colums + if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + pos = semAnalyzer.genColListRegex(".*", expr.getChildCount() == 0 ? null + : SemanticAnalyzer.getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, + col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, + true); + selectStar = true; + } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL + && !hasAsClause + && !inputRR.getIsExprResolver() + && SemanticAnalyzer.isRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), semAnalyzer.conf)) { + // In case the expression is a regex COL. + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = semAnalyzer.genColListRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), null, expr, + col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, + true); + } else if (expr.getType() == HiveParser.DOT + && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL + && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0) + .getChild(0).getText().toLowerCase())) + && !hasAsClause + && !inputRR.getIsExprResolver() + && SemanticAnalyzer.isRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), semAnalyzer.conf)) { + // In case the expression is TABLE.COL (col can be regex). + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = semAnalyzer.genColListRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), + SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText() + .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos, + out_rwsch, tabAliasesForAllProjs, true); + } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") + && !(srcRel instanceof HiveAggregate)) { + // Likely a malformed query eg, select hash(distinct c1) from t1; + throw new CalciteSemanticException("Distinct without an aggreggation."); + } else { + // Case when this is an expression + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = semAnalyzer.genExprNodeDesc(expr, inputRR, tcCtx); + String recommended = semAnalyzer.recommendName(exp, colAlias); + if (recommended != null && out_rwsch.get(null, recommended) == null) { + colAlias = recommended; + } + col_list.add(exp); + + ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), + exp.getWritableObjectInspector(), tabAlias, false); + colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) + .isSkewedCol() : false); + if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { + throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." + + colAlias + " => " + colInfo + " due to duplication, see previous warnings"); + } + + if (exp instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; + String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); + if (altMapping != null) { + out_rwsch.put(altMapping[0], altMapping[1], colInfo); + } + } + + pos = Integer.valueOf(pos.intValue() + 1); + } + } + selectStar = selectStar && exprList.getChildCount() == posn + 1; + + // 7. Convert Hive projections to Calcite + List calciteColLst = new ArrayList(); + RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), + buildHiveColNameToInputPosMap(col_list, inputRR), 0, false); + for (ExprNodeDesc colExpr : col_list) { + calciteColLst.add(rexNodeConv.convert(colExpr)); + } + + // 8. Build Calcite Rel + RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); + + return selRel; + } + + private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { + if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { + return genLogicalPlan(qbexpr.getQB(), false); + } + if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { + RelNode qbexpr1Ops = genLogicalPlan(qbexpr.getQBExpr1()); + RelNode qbexpr2Ops = genLogicalPlan(qbexpr.getQBExpr2()); + + return genUnionLogicalPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), qbexpr1Ops, + qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); + } + return null; + } + + private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { + RelNode srcRel = null; + RelNode filterRel = null; + RelNode gbRel = null; + RelNode gbHavingRel = null; + RelNode selectRel = null; + RelNode obRel = null; + RelNode limitRel = null; + + // First generate all the opInfos for the elements in the from clause + Map aliasToRel = new HashMap(); + + // 0. Check if we can handle the SubQuery; + // canHandleQbForCbo returns null if the query can be handled. + String reason = canHandleQbForCbo(semAnalyzer.queryProperties, semAnalyzer.conf, false, + semAnalyzer.LOG.isDebugEnabled()); + if (reason != null) { + String msg = "CBO can not handle Sub Query"; + if (semAnalyzer.LOG.isDebugEnabled()) { + semAnalyzer.LOG.debug(msg + " because it: " + reason); + } + throw new CalciteSemanticException(msg); + } + + // 1. Build Rel For Src (SubQuery, TS, Join) + // 1.1. Recurse over the subqueries to fill the subquery part of the plan + for (String subqAlias : qb.getSubqAliases()) { + QBExpr qbexpr = qb.getSubqForAlias(subqAlias); + aliasToRel.put(subqAlias, genLogicalPlan(qbexpr)); + } + + // 1.2 Recurse over all the source tables + for (String tableAlias : qb.getTabAliases()) { + RelNode op = genTableLogicalPlan(tableAlias, qb); + aliasToRel.put(tableAlias, op); + } + + if (aliasToRel.isEmpty()) { + // // This may happen for queries like select 1; (no source table) + // We can do following which is same, as what Hive does. + // With this, we will be able to generate Calcite plan. + // qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); + // RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); + // qb.addAlias(DUMMY_TABLE); + // qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); + // aliasToRel.put(DUMMY_TABLE, op); + // However, Hive trips later while trying to get Metadata for this dummy + // table + // So, for now lets just disable this. Anyway there is nothing much to + // optimize in such cases. + throw new CalciteSemanticException("Unsupported"); + + } + // 1.3 process join + if (qb.getParseInfo().getJoinExpr() != null) { + srcRel = genJoinLogicalPlan(qb.getParseInfo().getJoinExpr(), aliasToRel); + } else { + // If no join then there should only be either 1 TS or 1 SubQuery + srcRel = aliasToRel.values().iterator().next(); + } + + // 2. Build Rel for where Clause + filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); + srcRel = (filterRel == null) ? srcRel : filterRel; + RelNode starSrcRel = srcRel; + + // 3. Build Rel for GB Clause + gbRel = genGBLogicalPlan(qb, srcRel); + srcRel = (gbRel == null) ? srcRel : gbRel; + + // 4. Build Rel for GB Having Clause + gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); + srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; + + // 5. Build Rel for Select Clause + selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel); + srcRel = (selectRel == null) ? srcRel : selectRel; + + // 6. Build Rel for OB Clause + Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, outerMostQB); + obRel = obTopProjPair.getKey(); + RelNode topConstrainingProjArgsRel = obTopProjPair.getValue(); + srcRel = (obRel == null) ? srcRel : obRel; + + // 7. Build Rel for Limit Clause + limitRel = genLimitLogicalPlan(qb, srcRel); + srcRel = (limitRel == null) ? srcRel : limitRel; + + // 8. Introduce top constraining select if needed. + // NOTES: + // 1. Calcite can not take an expr in OB; hence it needs to be added as VC + // in the input select; In such cases we need to introduce a select on top + // to ensure VC is not visible beyond Limit, OB. + // 2. Hive can not preserve order across select. In subqueries OB is used + // to get a deterministic set of tuples from following limit. Hence we + // introduce the constraining select above Limit (if present) instead of + // OB. + // 3. The top level OB will not introduce constraining select due to Hive + // limitation(#2) stated above. The RR for OB will not include VC. Thus + // Result Schema will not include exprs used by top OB. During AST Conv, + // in the PlanModifierForASTConv we would modify the top level OB to + // migrate exprs from input sel to SortRel (Note that Calcite doesn't + // support this; but since we are done with Calcite at this point its OK). + if (topConstrainingProjArgsRel != null) { + List originalInputRefs = Lists.transform(topConstrainingProjArgsRel.getRowType() + .getFieldList(), new Function() { + @Override + public RexNode apply(RelDataTypeField input) { + return new RexInputRef(input.getIndex(), input.getType()); + } + }); + RowResolver topConstrainingProjRR = new RowResolver(); + if (!RowResolver.add(topConstrainingProjRR, + this.relToHiveRR.get(topConstrainingProjArgsRel))) { + semAnalyzer.LOG + .warn("Duplicates detected when adding columns to RR: see previous message"); + } + srcRel = genSelectRelNode(originalInputRefs, topConstrainingProjRR, srcRel); + } + + // 9. Incase this QB corresponds to subquery then modify its RR to point + // to subquery alias + // TODO: cleanup this + if (qb.getParseInfo().getAlias() != null) { + RowResolver rr = this.relToHiveRR.get(srcRel); + RowResolver newRR = new RowResolver(); + String alias = qb.getParseInfo().getAlias(); + for (ColumnInfo colInfo : rr.getColumnInfos()) { + String name = colInfo.getInternalName(); + String[] tmp = rr.reverseLookup(name); + if ("".equals(tmp[0]) || tmp[1] == null) { + // ast expression is not a valid column name for table + tmp[1] = colInfo.getInternalName(); + } + ColumnInfo newCi = new ColumnInfo(colInfo); + newCi.setTabAlias(alias); + newRR.put(alias, tmp[1], newCi); + } + relToHiveRR.put(srcRel, newRR); + relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel)); + } + + if (semAnalyzer.LOG.isDebugEnabled()) { + semAnalyzer.LOG.debug("Created Plan for Query Block " + qb.getId()); + } + + semAnalyzer.setQB(qb); + + return srcRel; + } + + private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel) + throws SemanticException { + RelNode gbFilter = null; + QBParseInfo qbp = getQBParseInfo(qb); + ASTNode havingClause = qbp.getHavingForClause(qbp.getClauseNames().iterator().next()); + + if (havingClause != null) { + if (!(srcRel instanceof HiveAggregate)) { + // ill-formed query like select * from t1 having c1 > 0; + throw new CalciteSemanticException("Having clause without any group-by."); + } + validateNoHavingReferenceToAlias(qb, (ASTNode) havingClause.getChild(0)); + gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, + true); + } + + return gbFilter; + } + + /* + * Bail if having clause uses Select Expression aliases for Aggregation + * expressions. We could do what Hive does. But this is non standard + * behavior. Making sure this doesn't cause issues when translating through + * Calcite is not worth it. + */ + private void validateNoHavingReferenceToAlias(QB qb, ASTNode havingExpr) + throws CalciteSemanticException { + + QBParseInfo qbPI = qb.getParseInfo(); + Map exprToAlias = qbPI.getAllExprToColumnAlias(); + /* + * a mouthful, but safe: - a QB is guaranteed to have atleast 1 + * destination - we don't support multi insert, so picking the first dest. + */ + Set aggExprs = qbPI.getDestToAggregationExprs().values().iterator().next().keySet(); + + for (Map.Entry selExpr : exprToAlias.entrySet()) { + ASTNode selAST = selExpr.getKey(); + if (!aggExprs.contains(selAST.toStringTree().toLowerCase())) { + continue; + } + final String aliasToCheck = selExpr.getValue(); + final Set aliasReferences = new HashSet(); + TreeVisitorAction action = new TreeVisitorAction() { + + @Override + public Object pre(Object t) { + if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) { + Object c = ParseDriver.adaptor.getChild(t, 0); + if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.Identifier + && ParseDriver.adaptor.getText(c).equals(aliasToCheck)) { + aliasReferences.add(t); + } + } + return t; + } + + @Override + public Object post(Object t) { + return t; + } + }; + new TreeVisitor(ParseDriver.adaptor).visit(havingExpr, action); + + if (aliasReferences.size() > 0) { + String havingClause = semAnalyzer.ctx.getTokenRewriteStream().toString( + havingExpr.getTokenStartIndex(), havingExpr.getTokenStopIndex()); + String msg = String.format("Encountered Select alias '%s' in having clause '%s'" + + " This non standard behavior is not supported with cbo on." + + " Turn off cbo for these queries.", aliasToCheck, havingClause); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + } + + } + + private ImmutableMap buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) { + ImmutableMap.Builder b = new ImmutableMap.Builder(); + for (ColumnInfo ci : rr.getRowSchema().getSignature()) { + b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName())); + } + return b.build(); + } + + private ImmutableMap buildHiveColNameToInputPosMap( + List col_list, RowResolver inputRR) { + // Build a map of Hive column Names (ExprNodeColumnDesc Name) + // to the positions of those projections in the input + Map hashCodeTocolumnDescMap = new HashMap(); + ExprNodeDescUtils.getExprNodeColumnDesc(col_list, hashCodeTocolumnDescMap); + ImmutableMap.Builder hiveColNameToInputPosMapBuilder = new ImmutableMap.Builder(); + String exprNodecolName; + for (ExprNodeDesc exprDesc : hashCodeTocolumnDescMap.values()) { + exprNodecolName = ((ExprNodeColumnDesc) exprDesc).getColumn(); + hiveColNameToInputPosMapBuilder.put(exprNodecolName, inputRR.getPosition(exprNodecolName)); + } + + return hiveColNameToInputPosMapBuilder.build(); + } + + private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { + QBParseInfo qbp = qb.getParseInfo(); + if (qbp.getClauseNames().size() > 1) { + String msg = String.format("Multi Insert is currently not supported in CBO," + + " turn off cbo to use Multi Insert."); + semAnalyzer.LOG.debug(msg); + throw new CalciteSemanticException(msg); + } + return qbp; + } + + private List getTabAliases(RowResolver inputRR) { + List tabAliases = new ArrayList(); // TODO: this should be + // unique + for (ColumnInfo ci : inputRR.getColumnInfos()) { + tabAliases.add(ci.getTabAlias()); + } + + return tabAliases; + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 29be691..e52ac7f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -22,95 +22,26 @@ import java.io.IOException; import java.io.Serializable; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.UndeclaredThrowableException; -import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; -import org.antlr.runtime.tree.TreeVisitor; -import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptQuery; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptSchema; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.plan.hep.HepMatchOrder; -import org.apache.calcite.plan.hep.HepPlanner; -import org.apache.calcite.plan.hep.HepProgram; -import org.apache.calcite.plan.hep.HepProgramBuilder; -import org.apache.calcite.rel.InvalidRelException; -import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelCollationImpl; -import org.apache.calcite.rel.RelFieldCollation; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.AggregateCall; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.RelFactories; -import org.apache.calcite.rel.core.SemiJoin; -import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; -import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; -import org.apache.calcite.rel.rules.FilterMergeRule; -import org.apache.calcite.rel.rules.FilterProjectTransposeRule; -import org.apache.calcite.rel.rules.FilterSetOpTransposeRule; -import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; -import org.apache.calcite.rel.rules.JoinToMultiJoinRule; -import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; -import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; -import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; -import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexFieldCollation; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.rex.RexWindowBound; -import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.sql.SqlAggFunction; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlExplainLevel; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlLiteral; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlWindow; -import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.sql2rel.RelFieldTrimmer; -import org.apache.calcite.tools.Frameworks; -import org.apache.calcite.util.CompositeList; -import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.ImmutableIntList; -import org.apache.calcite.util.Pair; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -179,32 +110,10 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; -import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner.PreCboCtx; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec; @@ -288,7 +197,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -297,12 +205,6 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableList.Builder; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; - /** * Implementation of the semantic analyzer. It generates the query plan. * There are other specific semantic analyzers for some hive operations such as @@ -369,7 +271,7 @@ //flag for partial scan during analyze ... compute statistics protected boolean partialscan; - private volatile boolean runCBO = true; // TODO: why is this volatile? + private volatile boolean runCBO = true; private volatile boolean disableJoinMerge = false; /* @@ -382,9 +284,9 @@ private ArrayList ctesExpanded; /** Not thread-safe. */ - private final ASTSearcher astSearcher = new ASTSearcher(); + final ASTSearcher astSearcher = new ASTSearcher(); - private static class Phase1Ctx { + static class Phase1Ctx { String dest; int nextNum; } @@ -674,6 +576,14 @@ public static String generateErrorMessage(ASTNode ast, String message) { return sb.toString(); } + ASTNode getAST() { + return this.ast; + } + + void setAST(ASTNode newAST) { + this.ast = newAST; + } + /** * Goes though the tabref tree and finds the alias for the table. Once found, * it records the table name-> alias association in aliasToTabs. It also makes @@ -805,6 +715,10 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { return alias; } + Map getNameToSplitSampleMap() { + return this.nameToSplitSample; + } + // Generate a temp table out of a value clause private ASTNode genValuesTempTable(ASTNode originalFrom) throws SemanticException { // Pick a name for the table @@ -1074,7 +988,7 @@ private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias) throws Sem qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr); } - private boolean isJoinToken(ASTNode node) { + static boolean isJoinToken(ASTNode node) { if ((node.getToken().getType() == HiveParser.TOK_JOIN) || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN) || isOuterJoinToken(node) @@ -1086,7 +1000,7 @@ private boolean isJoinToken(ASTNode node) { return false; } - private boolean isOuterJoinToken(ASTNode node) { + static private boolean isOuterJoinToken(ASTNode node) { return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN); @@ -1182,30 +1096,6 @@ private String processLateralView(QB qb, ASTNode lateralView) return alias; } - /** The context that doPhase1 uses to populate information pertaining - * to CBO (currently, this is used for CTAS and insert-as-select). */ - private static class PreCboCtx { - enum Type { - NONE, - INSERT, - CTAS, - - UNEXPECTED - } - public ASTNode nodeOfInterest; - public Type type = Type.NONE; - public void set(Type type, ASTNode ast) { - if (this.type != Type.NONE) { - STATIC_LOG.warn("Setting " + type + " when already " + this.type - + "; node " + ast.dump() + " vs old node " + nodeOfInterest.dump()); - this.type = Type.UNEXPECTED; - return; - } - this.type = type; - this.nodeOfInterest = ast; - } - } - /** * Phase 1: (including, but not limited to): * @@ -1522,11 +1412,6 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PreCboCtx cboCtx) return phase1Result; } - private void traceLogAst(ASTNode ast, String what) { - if (!LOG.isTraceEnabled()) return; - LOG.trace(what + ast.dump()); - } - private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) throws SemanticException { if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { @@ -2761,7 +2646,7 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, @SuppressWarnings("nls") // TODO: make aliases unique, otherwise needless rewriting takes place - private Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, + Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ArrayList col_list, HashSet excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, boolean ensureUniqueCols) throws SemanticException { @@ -2907,7 +2792,7 @@ private String getScriptArgs(String cmd) { return (end == -1) ? "" : cmd.substring(end, cmd.length()); } - private static int getPositionFromInternalName(String internalName) { + static int getPositionFromInternalName(String internalName) { return HiveConf.getPositionFromInternalName(internalName); } @@ -3358,7 +3243,7 @@ private int setBit(int bitmap, int bitIdx) { } } - private static String[] getColAlias(ASTNode selExpr, String defaultName, + static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR, boolean includeFuncName, int colNum) { String colAlias = null; String tabAlias = null; @@ -3434,7 +3319,7 @@ private int setBit(int bitmap, int bitIdx) { * Returns whether the pattern is a regex expression (instead of a normal * string). Normal string is a string with all alphabets/digits and "_". */ - private static boolean isRegex(String pattern, HiveConf conf) { + static boolean isRegex(String pattern, HiveConf conf) { String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); if ( "column".equals(qIdSupport)) { return false; @@ -3727,7 +3612,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { return output; } - private String recommendName(ExprNodeDesc exp, String colAlias) { + String recommendName(ExprNodeDesc exp, String colAlias) { if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { return null; } @@ -3738,6 +3623,14 @@ private String recommendName(ExprNodeDesc exp, String colAlias) { return null; } + String getAutogenColAliasPrfxLbl() { + return this.autogenColAliasPrfxLbl; + } + + boolean autogenColAliasPrfxIncludeFuncName() { + return this.autogenColAliasPrfxIncludeFuncName; + } + /** * Class to store GenericUDAF related information. */ @@ -3844,7 +3737,7 @@ static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, return r; } - private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( + static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( GroupByDesc.Mode mode, boolean isDistinct) { switch (mode) { case COMPLETE: @@ -9543,7 +9436,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { return output; } - private boolean isSkewedCol(String alias, QB qb, String colName) { + static boolean isSkewedCol(String alias, QB qb, String colName) { boolean isSkewedCol = false; List skewedCols = qb.getSkewedColumnNames(alias); for (String skewedCol : skewedCols) { @@ -9752,7 +9645,7 @@ public Operator genPlan(QB qb, boolean skipAmbiguityCheck) rewriteRRForSubQ(qb.getAlias(), bodyOpInfo, skipAmbiguityCheck); } - this.qb = qb; + setQB(qb); return bodyOpInfo; } @@ -10044,7 +9937,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { if (cboCtx.type == PreCboCtx.Type.CTAS) { queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query } - runCBO = canHandleAstForCbo(queryForCbo, qb, cboCtx); + runCBO = CalcitePlanner.canHandleAstForCbo(queryForCbo, qb, cboCtx, this); } // Save the result schema derived from the sink operator produced @@ -10055,7 +9948,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { if (runCBO) { disableJoinMerge = true; - CalciteBasedPlanner calcitePlanner = new CalciteBasedPlanner(); + CalcitePlanner calcitePlanner = new CalcitePlanner(this); boolean reAnalyzeAST = false; try { @@ -10063,13 +9956,13 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { ASTNode newAST = calcitePlanner.getOptimizedAST(prunedPartitions); // 1.1. Fix up the query for insert/ctas - newAST = fixUpCtasAndInsertAfterCbo(ast, newAST, cboCtx); + newAST = calcitePlanner.fixUpCtasAndInsertAfterCbo(ast, newAST, cboCtx); // 2. Regen OP plan from optimized AST init(false); if (cboCtx.type == PreCboCtx.Type.CTAS) { // Redo create-table analysis, because it's not part of doPhase1. - newAST = reAnalyzeCtasAfterCbo(newAST); + newAST = calcitePlanner.reAnalyzeCtasAfterCbo(newAST); } ctx_1 = initPhase1Ctx(); if (!doPhase1(newAST, qb, ctx_1, null)) { @@ -10095,7 +9988,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { * .getRowResolver(), true); */ } catch (Exception e) { - boolean isMissingStats = calcitePlanner.noColsMissingStats.get() > 0; + boolean isMissingStats = calcitePlanner.getNumberOfColsMissingStats() > 0; if (isMissingStats) { LOG.error("CBO failed due to missing column stats (see previous errors), skipping CBO"); } else { @@ -10218,120 +10111,6 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { return; } - private ASTNode fixUpCtasAndInsertAfterCbo( - ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) throws SemanticException { - switch (cboCtx.type) { - case NONE: return newAst; // nothing to do - case CTAS: { - // Patch the optimized query back into original CTAS AST, replacing the original query. - replaceASTChild(cboCtx.nodeOfInterest, newAst); - return originalAst; - } - case INSERT: { - // We need to patch the dest back to original into new query. - // This makes assumptions about the structure of the AST. - ASTNode newDest = astSearcher.simpleBreadthFirstSearch( - newAst, HiveParser.TOK_QUERY, HiveParser.TOK_INSERT, HiveParser.TOK_DESTINATION); - if (newDest == null) { - LOG.error("Cannot find destination after CBO; new ast is "+ newAst.dump()); - throw new SemanticException("Cannot find destination after CBO"); - } - replaceASTChild(newDest, cboCtx.nodeOfInterest); - return newAst; - } - default: throw new AssertionError("Unexpected type " + cboCtx.type); - } - } - - private ASTNode reAnalyzeCtasAfterCbo(ASTNode newAst) throws SemanticException { - // analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it here. - this.ast = newAst; - newAst = analyzeCreateTable(newAst, qb, null); - if (newAst == null) { - LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" - + " new ast is " + this.ast.dump()); - throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO"); - } - return newAst; - } - - private boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { - int root = ast.getToken().getType(); - boolean needToLogMessage = LOG.isInfoEnabled(); - boolean isSupportedRoot = - root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN || qb.isCTAS(); - // Check AST. - // Assumption: If top level QB is query then everything below it must also be Query - // Can there be an insert or CTAS that wouldn't - // be supported and would require additional checks similar to IsQuery? - boolean isSupportedType = - qb.getIsQuery() || qb.isCTAS() || cboCtx.type == PreCboCtx.Type.INSERT; - boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast); - boolean result = isSupportedRoot && isSupportedType && createVwDesc == null && noBadTokens; - if (!result) { - if (needToLogMessage) { - String msg = ""; - if (!isSupportedRoot) msg += "doesn't have QUERY or EXPLAIN as root and not a CTAS; "; - if (!isSupportedType) msg += "is not a query, CTAS, or insert; "; - if (createVwDesc != null) msg += "has create view; "; - if (!noBadTokens) msg += "has unsupported tokens; "; - - if (msg.isEmpty()) msg += "has some unspecified limitations; "; - LOG.info("Not invoking CBO because the statement " + msg.substring(0, msg.length() - 2)); - } - return false; - } - // Now check QB in more detail. canHandleQbForCbo returns null if query can be handled. - String msg = canHandleQbForCbo(qb, true, needToLogMessage); - if (msg == null) { - return true; - } - if (needToLogMessage) { - LOG.info("Not invoking CBO because the statement " + msg.substring(0, msg.length() - 2)); - } - return false; - } - - private class ASTSearcher { - private final LinkedList searchQueue = new LinkedList(); - /** - * Performs breadth-first search of the AST for a nested set of tokens. Tokens don't have to be - * each others' direct children, they can be separated by layers of other tokens. For each token - * in the list, the first one found is matched and there's no backtracking; thus, if AST has - * multiple instances of some token, of which only one matches, it is not guaranteed to be found. - * We use this for simple things. - * Not thread-safe - reuses searchQueue. - */ - public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) { - searchQueue.clear(); - searchQueue.add(ast); - for (int i = 0; i < tokens.length; ++i) { - boolean found = false; - int token = tokens[i]; - while (!searchQueue.isEmpty() && !found) { - ASTNode next = searchQueue.poll(); - found = next.getType() == token; - if (found) { - if (i == tokens.length - 1) return next; - searchQueue.clear(); - } - for (int j = 0; j < next.getChildCount(); ++j) { - searchQueue.add((ASTNode)next.getChild(j)); - } - } - if (!found) return null; - } - return null; - } - } - - private void replaceASTChild(ASTNode child, ASTNode newChild) { - ASTNode parent = (ASTNode)child.parent; - int childIndex = child.childIndex; - parent.deleteChild(childIndex); - parent.insertChild(childIndex, newChild); - } - private void putAccessedColumnsToReadEntity(HashSet inputs, ColumnAccessInfo columnAccessInfo) { Map> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { @@ -10507,13 +10286,13 @@ private void saveViewDefinition() throws SemanticException { createVwDesc.setViewExpandedText(expandedText); } - private List convertRowSchemaToViewSchema(RowResolver rr) throws SemanticException { + static List convertRowSchemaToViewSchema(RowResolver rr) throws SemanticException { List fieldSchema = convertRowSchemaToResultSetSchema(rr, false); ParseUtils.validateColumnNameUniqueness(fieldSchema); return fieldSchema; } - private List convertRowSchemaToResultSetSchema(RowResolver rr, + static List convertRowSchemaToResultSetSchema(RowResolver rr, boolean useTabAliasIfAvailable) { List fieldSchemas = new ArrayList(); String[] qualifiedColName; @@ -10828,7 +10607,7 @@ public RowResolver getRowResolver(Operator opt) { * the semantic analyzer need to deal with the select statement with respect * to the SerDe and Storage Format. */ - private ASTNode analyzeCreateTable( + ASTNode analyzeCreateTable( ASTNode ast, QB qb, PreCboCtx cboCtx) throws SemanticException { String[] qualifiedTabName = getQualifiedTableName((ASTNode) ast.getChild(0)); String dbDotTab = getDotName(qualifiedTabName); @@ -11177,6 +10956,10 @@ private ASTNode analyzeCreateView(ASTNode ast, QB qb) return selectStmt; } + CreateViewDesc getCreateViewDesc() { + return this.createVwDesc; + } + // validate the create view statement // the statement could be CREATE VIEW, REPLACE VIEW, or ALTER VIEW AS SELECT // check semantic conditions @@ -12488,2160 +12271,4 @@ protected boolean updating() { protected boolean deleting() { return false; } - - /**** Temporary Place Holder For Calcite plan Gen, Optimizer ****/ - - /** - * Entry point to Optimizations using Calcite. Checks whether Calcite can handle the query. - * @param qbToChk Query block to check. - * @param verbose Whether return value should be verbose in case of failure. - * @return null if the query can be handled; non-null reason string if it cannot be. - */ - private String canHandleQbForCbo(QB qbToChk, boolean topLevelQB, boolean verbose) { - // Assumption: - // 1. If top level QB is query then everything below it must also be Query - // 2. Nested Subquery will return false for qbToChk.getIsQuery() - boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); - boolean isStrictTest = isInTest - && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); - boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest; - if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() - && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() - && !queryProperties.hasPTF() && !queryProperties.usesScript() - && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { - return null; // Ok to run CBO. - } - - // Not ok to run CBO, build error message. - String msg = ""; - if (verbose) { - if (isStrictTest) msg += "is in test running in mode other than nonstrict; "; - if (!hasEnoughJoins) msg += "has too few joins; "; - if (queryProperties.hasClusterBy()) msg += "has cluster by; "; - if (queryProperties.hasDistributeBy()) msg += "has distribute by; "; - if (queryProperties.hasSortBy()) msg += "has sort by; "; - if (queryProperties.hasPTF()) msg += "has PTF; "; - if (queryProperties.usesScript()) msg += "uses scripts; "; - if (queryProperties.hasMultiDestQuery()) msg += "is a multi-destination query; "; - if (queryProperties.hasLateralViews()) msg += "has lateral views; "; - - if (msg.isEmpty()) msg += "has some unspecified limitations; "; - } - return msg; - } - - private class CalciteBasedPlanner implements Frameworks.PlannerAction { - private RelOptCluster cluster; - private RelOptSchema relOptSchema; - private SemanticException semanticException; - private Map partitionCache; - private final AtomicInteger noColsMissingStats = new AtomicInteger(0); - List topLevelFieldSchema; - - // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or - // just last one. - LinkedHashMap relToHiveRR = new LinkedHashMap(); - LinkedHashMap> relToHiveColNameCalcitePosMap = new LinkedHashMap>(); - - private ASTNode getOptimizedAST(Map partitionCache) - throws SemanticException { - ASTNode calciteOptimizedAST = null; - RelNode optimizedCalcitePlan = null; - this.partitionCache = partitionCache; - - try { - optimizedCalcitePlan = Frameworks.withPlanner(this, - Frameworks.newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); - } catch (Exception e) { - rethrowCalciteException(e); - throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); - } - calciteOptimizedAST = ASTConverter.convert(optimizedCalcitePlan, topLevelFieldSchema); - - return calciteOptimizedAST; - } - - /* - * Unwraps a chain of useless UndeclaredThrowableException-s, InvocationTargetException-s - * and RuntimeException-s potentially coming from CBO/Calcite code. - */ - private void rethrowCalciteException(Exception e) throws SemanticException { - Throwable first = (semanticException != null) ? semanticException : e, - current = first, cause = current.getCause(); - while (cause != null) { - Throwable causeOfCause = cause.getCause(); - if (current == first && causeOfCause == null && isUselessCause(first)) { - // "cause" is a root cause, and "e"/"first" is a useless exception it's wrapped in. - first = cause; - break; - } else if (causeOfCause != null && isUselessCause(cause) - && ExceptionHelper.resetCause(current, causeOfCause)) { - // "cause" was a useless intermediate cause and was replace it with its own cause. - cause = causeOfCause; - continue; // do loop once again with the new cause of "current" - } - current = cause; - cause = current.getCause(); - } - - if (first instanceof RuntimeException) { - throw (RuntimeException)first; - } else if (first instanceof SemanticException) { - throw (SemanticException)first; - } - throw new RuntimeException(first); - } - - private boolean isUselessCause(Throwable t) { - return t instanceof RuntimeException || t instanceof InvocationTargetException - || t instanceof UndeclaredThrowableException; - } - - @Override - public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlus rootSchema) { - RelNode calciteGenPlan = null; - RelNode calcitePreCboPlan = null; - RelNode calciteOptimizedPlan = null; - - /* - * recreate cluster, so that it picks up the additional traitDef - */ - RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); - final RelOptQuery query = new RelOptQuery(planner); - final RexBuilder rexBuilder = cluster.getRexBuilder(); - cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); - - this.cluster = cluster; - this.relOptSchema = relOptSchema; - - try { - calciteGenPlan = genLogicalPlan(qb, true); - topLevelFieldSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(calciteGenPlan), - HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES)); - } catch (SemanticException e) { - semanticException = e; - throw new RuntimeException(e); - } - - calcitePreCboPlan = applyPreCBOTransforms(calciteGenPlan, HiveDefaultRelMetadataProvider.INSTANCE); - List list = Lists.newArrayList(); - list.add(HiveDefaultRelMetadataProvider.INSTANCE); - RelTraitSet desiredTraits = cluster.traitSetOf(HiveRelNode.CONVENTION, RelCollationImpl.EMPTY); - - HepProgram hepPgm = null; - HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) - .addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); - hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, - HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); - - hepPgm = hepPgmBldr.build(); - HepPlanner hepPlanner = new HepPlanner(hepPgm); - - hepPlanner.registerMetadataProviders(list); - RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); - - RelNode rootRel = calcitePreCboPlan; - hepPlanner.setRoot(rootRel); - if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { - rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); - } - hepPlanner.setRoot(rootRel); - - calciteOptimizedPlan = hepPlanner.findBestExp(); - - if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { - LOG.debug("CBO Planning details:\n"); - LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); - LOG.debug("Plan After PPD, PartPruning, ColumnPruning:\n" - + RelOptUtil.toString(calcitePreCboPlan)); - LOG.debug("Plan After Join Reordering:\n" - + RelOptUtil.toString(calciteOptimizedPlan, SqlExplainLevel.ALL_ATTRIBUTES)); - } - - return calciteOptimizedPlan; - } - - public RelNode applyPreCBOTransforms(RelNode basePlan, RelMetadataProvider mdProvider) { - - // TODO: Decorelation of subquery should be done before attempting - // Partition Pruning; otherwise Expression evaluation may try to execute - // corelated sub query. - - // Push Down Semi Joins - basePlan = hepPlan(basePlan, true, mdProvider, - SemiJoinJoinTransposeRule.INSTANCE, - SemiJoinFilterTransposeRule.INSTANCE, - SemiJoinProjectTransposeRule.INSTANCE); - - basePlan = hepPlan(basePlan, true, mdProvider, - new HiveFilterProjectTransposeRule( - Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, - HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( - HiveFilter.DEFAULT_FILTER_FACTORY), new FilterMergeRule( - HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, - new FilterAggregateTransposeRule( - Filter.class, - HiveFilter.DEFAULT_FILTER_FACTORY, - Aggregate.class)); - - basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( - Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), - // TODO: Enable it after CALCITE-407 is fixed - //RemoveTrivialProjectRule.INSTANCE, - new HivePartitionPruneRule(SemanticAnalyzer.this.conf)); - - RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, - HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, - HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); - basePlan = fieldTrimmer.trim(basePlan); - - basePlan = hepPlan(basePlan, true, mdProvider, - new FilterProjectTransposeRule(Filter.class, - HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, - HiveProject.DEFAULT_PROJECT_FACTORY)); - - return basePlan; - } - - private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, RelOptRule... rules) { - - RelNode optimizedRelNode = basePlan; - HepProgramBuilder programBuilder = new HepProgramBuilder(); - if (followPlanChanges) { - programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN); - programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); - } else { - // TODO: Should this be also TOP_DOWN? - for (RelOptRule r : rules) - programBuilder.addRuleInstance(r); - } - - HepPlanner planner = new HepPlanner(programBuilder.build()); - List list = Lists.newArrayList(); - list.add(mdProvider); - planner.registerMetadataProviders(list); - RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - basePlan.getCluster().setMetadataProvider( - new CachingRelMetadataProvider(chainedProvider, planner)); - - planner.setRoot(basePlan); - optimizedRelNode = planner.findBestExp(); - - return optimizedRelNode; - } - - @SuppressWarnings("nls") - private RelNode genUnionLogicalPlan(String unionalias, String leftalias, RelNode leftRel, - String rightalias, RelNode rightRel) throws SemanticException { - HiveUnion unionRel = null; - - // 1. Get Row Resolvers, Column map for original left and right input of - // Union Rel - RowResolver leftRR = this.relToHiveRR.get(leftRel); - RowResolver rightRR = this.relToHiveRR.get(rightRel); - HashMap leftmap = leftRR.getFieldMap(leftalias); - HashMap rightmap = rightRR.getFieldMap(rightalias); - - // 2. Validate that Union is feasible according to Hive (by using type - // info from RR) - if (leftmap.size() != rightmap.size()) { - throw new SemanticException("Schema of both sides of union should match."); - } - - ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias( - qb.getAliases().get(0)); - for (Map.Entry lEntry : leftmap.entrySet()) { - String field = lEntry.getKey(); - ColumnInfo lInfo = lEntry.getValue(); - ColumnInfo rInfo = rightmap.get(field); - if (rInfo == null) { - throw new SemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match. " + rightalias - + " does not have the field " + field)); - } - if (lInfo == null) { - throw new SemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match. " + leftalias - + " does not have the field " + field)); - } - if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { - throw new CalciteSemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match: field " + field + ":" - + " appears on the left side of the UNION at column position: " - + getPositionFromInternalName(lInfo.getInternalName()) - + ", and on the right side of the UNION at column position: " - + getPositionFromInternalName(rInfo.getInternalName()) - + ". Column positions should match for a UNION")); - } - // try widening coversion, otherwise fail union - TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), - rInfo.getType()); - if (commonTypeInfo == null) { - throw new CalciteSemanticException(generateErrorMessage(tabref, - "Schema of both sides of union should match: Column " + field + " is of type " - + lInfo.getType().getTypeName() + " on first table and type " - + rInfo.getType().getTypeName() + " on second table")); - } - } - - // 3. construct Union Output RR using original left & right Input - RowResolver unionoutRR = new RowResolver(); - for (Map.Entry lEntry : leftmap.entrySet()) { - String field = lEntry.getKey(); - ColumnInfo lInfo = lEntry.getValue(); - ColumnInfo rInfo = rightmap.get(field); - ColumnInfo unionColInfo = new ColumnInfo(lInfo); - unionColInfo.setTabAlias(unionalias); - unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), - rInfo.getType())); - unionoutRR.put(unionalias, field, unionColInfo); - } - - // 4. Determine which columns requires cast on left/right input (Calcite - // requires exact types on both sides of union) - boolean leftNeedsTypeCast = false; - boolean rightNeedsTypeCast = false; - List leftProjs = new ArrayList(); - List rightProjs = new ArrayList(); - List leftRowDT = leftRel.getRowType().getFieldList(); - List rightRowDT = rightRel.getRowType().getFieldList(); - - RelDataType leftFieldDT; - RelDataType rightFieldDT; - RelDataType unionFieldDT; - for (int i = 0; i < leftRowDT.size(); i++) { - leftFieldDT = leftRowDT.get(i).getType(); - rightFieldDT = rightRowDT.get(i).getType(); - if (!leftFieldDT.equals(rightFieldDT)) { - unionFieldDT = TypeConverter.convert(unionoutRR.getColumnInfos().get(i).getType(), - cluster.getTypeFactory()); - if (!unionFieldDT.equals(leftFieldDT)) { - leftNeedsTypeCast = true; - } - leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, - cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); - - if (!unionFieldDT.equals(rightFieldDT)) { - rightNeedsTypeCast = true; - } - rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldDT, - cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); - } else { - leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldDT, - cluster.getRexBuilder().makeInputRef(leftFieldDT, i), true)); - rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldDT, - cluster.getRexBuilder().makeInputRef(rightFieldDT, i), true)); - } - } - - // 5. Introduce Project Rel above original left/right inputs if cast is - // needed for type parity - RelNode unionLeftInput = leftRel; - RelNode unionRightInput = rightRel; - if (leftNeedsTypeCast) { - unionLeftInput = HiveProject.create(leftRel, leftProjs, leftRel.getRowType() - .getFieldNames()); - } - if (rightNeedsTypeCast) { - unionRightInput = HiveProject.create(rightRel, rightProjs, rightRel.getRowType() - .getFieldNames()); - } - - // 6. Construct Union Rel - ImmutableList.Builder bldr = new ImmutableList.Builder(); - bldr.add(unionLeftInput); - bldr.add(unionRightInput); - unionRel = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), - bldr.build()); - - relToHiveRR.put(unionRel, unionoutRR); - relToHiveColNameCalcitePosMap.put(unionRel, - this.buildHiveToCalciteColumnMap(unionoutRR, unionRel)); - - return unionRel; - } - - private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJoinType, - ASTNode joinCond) throws SemanticException { - RelNode joinRel = null; - - // 1. construct the RowResolver for the new Join Node by combining row - // resolvers from left, right - RowResolver leftRR = this.relToHiveRR.get(leftRel); - RowResolver rightRR = this.relToHiveRR.get(rightRel); - RowResolver joinRR = null; - - if (hiveJoinType != JoinType.LEFTSEMI) { - joinRR = RowResolver.getCombinedRR(leftRR, rightRR); - } else { - joinRR = new RowResolver(); - if (!RowResolver.add(joinRR, leftRR)) { - LOG.warn("Duplicates detected when adding columns to RR: see previous message"); - } - } - - // 2. Construct ExpressionNodeDesc representing Join Condition - RexNode calciteJoinCond = null; - if (joinCond != null) { - JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); - Map exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, - jCtx); - if (jCtx.getError() != null) - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), - jCtx.getError())); - - ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); - - List inputRels = new ArrayList(); - inputRels.add(leftRel); - inputRels.add(rightRel); - calciteJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, - relToHiveRR, relToHiveColNameCalcitePosMap, false); - } else { - calciteJoinCond = cluster.getRexBuilder().makeLiteral(true); - } - - // 3. Validate that join condition is legal (i.e no function refering to - // both sides of join, only equi join) - // TODO: Join filter handling (only supported for OJ by runtime or is it - // supported for IJ as well) - - // 4. Construct Join Rel Node - boolean leftSemiJoin = false; - JoinRelType calciteJoinType; - switch (hiveJoinType) { - case LEFTOUTER: - calciteJoinType = JoinRelType.LEFT; - break; - case RIGHTOUTER: - calciteJoinType = JoinRelType.RIGHT; - break; - case FULLOUTER: - calciteJoinType = JoinRelType.FULL; - break; - case LEFTSEMI: - calciteJoinType = JoinRelType.INNER; - leftSemiJoin = true; - break; - case INNER: - default: - calciteJoinType = JoinRelType.INNER; - break; - } - - if (leftSemiJoin) { - List sysFieldList = new ArrayList(); - List leftJoinKeys = new ArrayList(); - List rightJoinKeys = new ArrayList(); - - RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, - calciteJoinCond, leftJoinKeys, rightJoinKeys, null, null); - - if (!nonEquiConds.isAlwaysTrue()) { - throw new SemanticException("Non equality condition not supported in Semi-Join" - + nonEquiConds); - } - - RelNode[] inputRels = new RelNode[] { leftRel, rightRel }; - final List leftKeys = new ArrayList(); - final List rightKeys = new ArrayList(); - calciteJoinCond = HiveCalciteUtil.projectNonColumnEquiConditions( - HiveProject.DEFAULT_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0, - leftKeys, rightKeys); - - joinRel = new SemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys), - ImmutableIntList.copyOf(rightKeys)); - } else { - joinRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, calciteJoinType, - leftSemiJoin); - } - // 5. Add new JoinRel & its RR to the maps - relToHiveColNameCalcitePosMap.put(joinRel, this.buildHiveToCalciteColumnMap(joinRR, joinRel)); - relToHiveRR.put(joinRel, joinRR); - - return joinRel; - } - - /** - * Generate Join Logical Plan Relnode by walking through the join AST. - * - * @param qb - * @param aliasToRel - * Alias(Table/Relation alias) to RelNode; only read and not - * written in to by this method - * @return - * @throws SemanticException - */ - private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map aliasToRel) - throws SemanticException { - RelNode leftRel = null; - RelNode rightRel = null; - JoinType hiveJoinType = null; - - if (joinParseTree.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { - String msg = String.format("UNIQUE JOIN is currently not supported in CBO," - + " turn off cbo to use UNIQUE JOIN."); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 1. Determine Join Type - // TODO: What about TOK_CROSSJOIN, TOK_MAPJOIN - switch (joinParseTree.getToken().getType()) { - case HiveParser.TOK_LEFTOUTERJOIN: - hiveJoinType = JoinType.LEFTOUTER; - break; - case HiveParser.TOK_RIGHTOUTERJOIN: - hiveJoinType = JoinType.RIGHTOUTER; - break; - case HiveParser.TOK_FULLOUTERJOIN: - hiveJoinType = JoinType.FULLOUTER; - break; - case HiveParser.TOK_LEFTSEMIJOIN: - hiveJoinType = JoinType.LEFTSEMI; - break; - default: - hiveJoinType = JoinType.INNER; - break; - } - - // 2. Get Left Table Alias - ASTNode left = (ASTNode) joinParseTree.getChild(0); - if ((left.getToken().getType() == HiveParser.TOK_TABREF) - || (left.getToken().getType() == HiveParser.TOK_SUBQUERY) - || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { - String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)) - .toLowerCase(); - String leftTableAlias = left.getChildCount() == 1 ? tableName : unescapeIdentifier(left - .getChild(left.getChildCount() - 1).getText().toLowerCase()); - // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? - // partitionTableFunctionSource partitioningSpec? expression*) - // guranteed to have an lias here: check done in processJoin - leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(left - .getChild(1).getText().toLowerCase()) - : leftTableAlias; - leftRel = aliasToRel.get(leftTableAlias); - } else if (isJoinToken(left)) { - leftRel = genJoinLogicalPlan(left, aliasToRel); - } else { - assert (false); - } - - // 3. Get Right Table Alias - ASTNode right = (ASTNode) joinParseTree.getChild(1); - if ((right.getToken().getType() == HiveParser.TOK_TABREF) - || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) - || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { - String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)) - .toLowerCase(); - String rightTableAlias = right.getChildCount() == 1 ? tableName : unescapeIdentifier(right - .getChild(right.getChildCount() - 1).getText().toLowerCase()); - // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? - // partitionTableFunctionSource partitioningSpec? expression*) - // guranteed to have an lias here: check done in processJoin - rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? unescapeIdentifier(right - .getChild(1).getText().toLowerCase()) - : rightTableAlias; - rightRel = aliasToRel.get(rightTableAlias); - } else { - assert (false); - } - - // 4. Get Join Condn - ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); - - // 5. Create Join rel - return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond); - } - - private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException { - RowResolver rr = new RowResolver(); - HiveTableScan tableRel = null; - - try { - - // 1. If the table has a Sample specified, bail from Calcite path. - if ( qb.getParseInfo().getTabSample(tableAlias) != null || - SemanticAnalyzer.this.nameToSplitSample.containsKey(tableAlias)) { - String msg = String.format("Table Sample specified for %s." + - " Currently we don't support Table Sample clauses in CBO," + - " turn off cbo for queries on tableSamples.", tableAlias); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 2. Get Table Metadata - Table tab = qb.getMetaData().getSrcForAlias(tableAlias); - - // 3. Get Table Logical Schema (Row Type) - // NOTE: Table logical schema = Non Partition Cols + Partition Cols + - // Virtual Cols - - // 3.1 Add Column info for non partion cols (Object Inspector fields) - StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() - .getObjectInspector(); - List fields = rowObjectInspector.getAllStructFieldRefs(); - ColumnInfo colInfo; - String colName; - ArrayList cInfoLst = new ArrayList(); - for (int i = 0; i < fields.size(); i++) { - colName = fields.get(i).getFieldName(); - colInfo = new ColumnInfo( - fields.get(i).getFieldName(), - TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), - tableAlias, false); - colInfo.setSkewedCol((isSkewedCol(tableAlias, qb, colName)) ? true : false); - rr.put(tableAlias, colName, colInfo); - cInfoLst.add(colInfo); - } - // TODO: Fix this - ArrayList nonPartitionColumns = new ArrayList(cInfoLst); - ArrayList partitionColumns = new ArrayList(); - - // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tab.getPartCols()) { - colName = part_col.getName(); - colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); - rr.put(tableAlias, colName, colInfo); - cInfoLst.add(colInfo); - partitionColumns.add(colInfo); - } - - // 3.3 Add column info corresponding to virtual columns - Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); - while (vcs.hasNext()) { - VirtualColumn vc = vcs.next(); - colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, - vc.getIsHidden()); - rr.put(tableAlias, vc.getName(), colInfo); - cInfoLst.add(colInfo); - } - - // 3.4 Build row type from field - RelDataType rowType = TypeConverter.getType(cluster, rr, null); - - // 4. Build RelOptAbstractTable - String fullyQualifiedTabName = tab.getDbName(); - if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) - fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); - else - fullyQualifiedTabName = tab.getTableName(); - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache, - noColsMissingStats); - - // 5. Build Hive Table Scan Rel - tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, - rowType); - - // 6. Add Schema(RR) to RelNode-Schema map - ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, tableRel); - relToHiveRR.put(tableRel, rr); - relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap); - } catch (Exception e) { - if (e instanceof SemanticException) { - throw (SemanticException) e; - } else { - throw (new RuntimeException(e)); - } - } - - return tableRel; - } - - private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel) throws SemanticException { - ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel)); - if (filterCondn instanceof ExprNodeConstantDesc && - !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { - // queries like select * from t1 where 'foo'; - // Calcite's rule PushFilterThroughProject chokes on it. Arguably, we can insert a cast to - // boolean in such cases, but since Postgres, Oracle and MS SQL server fail on compile time - // for such queries, its an arcane corner case, not worth of adding that complexity. - throw new CalciteSemanticException("Filter expression with non-boolean return type."); - } - ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap - .get(srcRel); - RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), - hiveColNameCalcitePosMap, 0, true).convert(filterCondn); - RexNode factoredFilterExpr = RexUtil.pullFactors(cluster.getRexBuilder(), convertedFilterExpr); - RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - srcRel, factoredFilterExpr); - this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); - relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); - relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); - - return filterRel; - } - - private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, - Map aliasToRel, boolean forHavingClause) throws SemanticException { - /* - * Handle Subquery predicates. - * - * Notes (8/22/14 hb): Why is this a copy of the code from {@link - * #genFilterPlan} - for now we will support the same behavior as non CBO - * route. - but plan to allow nested SubQueries(Restriction.9.m) and - * multiple SubQuery expressions(Restriction.8.m). This requires use to - * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush out - * Null semantics(CALCITE-373) - besides only the driving code has been - * copied. Most of the code which is SubQueryUtils and QBSubQuery is - * reused. - */ - int numSrcColumns = srcRel.getRowType().getFieldCount(); - List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - if (subQueriesInOriginalTree.size() > 0) { - - /* - * Restriction.9.m :: disallow nested SubQuery expressions. - */ - if (qb.getSubQueryPredicateDef() != null) { - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported.")); - } - - /* - * Restriction.8.m :: We allow only 1 SubQuery expression per Query. - */ - if (subQueriesInOriginalTree.size() > 1) { - - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); - } - - /* - * Clone the Search AST; apply all rewrites on the clone. - */ - ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); - List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outerQBRR = inputRR; - ImmutableMap outerQBPosMap = - relToHiveColNameCalcitePosMap.get(srcRel); - - for (int i = 0; i < subQueries.size(); i++) { - ASTNode subQueryAST = subQueries.get(i); - ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); - - int sqIdx = qb.incrNumSubQueryPredicates(); - clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); - - QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, - originalSubQueryAST, ctx); - - if (!forHavingClause) { - qb.setWhereClauseSubQueryPredicate(subQuery); - } else { - qb.setHavingClauseSubQueryPredicate(subQuery); - } - String havingInputAlias = null; - - if (forHavingClause) { - havingInputAlias = "gby_sq" + sqIdx; - aliasToRel.put(havingInputAlias, srcRel); - } - - subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, - aliasToRel.keySet()); - - QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); - qbSQ.setSubQueryDef(subQuery.getSubQuery()); - Phase1Ctx ctx_1 = initPhase1Ctx(); - doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null); - getMetaData(qbSQ); - RelNode subQueryRelNode = genLogicalPlan(qbSQ, false); - aliasToRel.put(subQuery.getAlias(), subQueryRelNode); - RowResolver sqRR = relToHiveRR.get(subQueryRelNode); - - /* - * Check.5.h :: For In and Not In the SubQuery must implicitly or - * explicitly only contain one select item. - */ - if (subQuery.getOperator().getType() != SubQueryType.EXISTS - && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) { - throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, - "SubQuery can contain only 1 item in Select List.")); - } - - /* - * If this is a Not In SubQuery Predicate then Join in the Null Check - * SubQuery. See QBSubQuery.NotInCheck for details on why and how this - * is constructed. - */ - if (subQuery.getNotInCheck() != null) { - QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); - notInCheck.setSQRR(sqRR); - QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); - qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery()); - ctx_1 = initPhase1Ctx(); - doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null); - getMetaData(qbSQ_nic); - RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false); - aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode); - srcRel = genJoinRelNode(srcRel, subQueryNICRelNode, - // set explicitly to inner until we figure out SemiJoin use - // notInCheck.getJoinType(), - JoinType.INNER, notInCheck.getJoinConditionAST()); - inputRR = relToHiveRR.get(srcRel); - if (forHavingClause) { - aliasToRel.put(havingInputAlias, srcRel); - } - } - - /* - * Gen Join between outer Operator and SQ op - */ - subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); - srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(), - subQuery.getJoinConditionAST()); - searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); - - srcRel = genFilterRelNode(searchCond, srcRel); - - /* - * For Not Exists and Not In, add a projection on top of the Left - * Outer Join. - */ - if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS - || subQuery.getOperator().getType() != SubQueryType.NOT_IN) { - srcRel = projectLeftOuterSide(srcRel, numSrcColumns); - } - } - relToHiveRR.put(srcRel, outerQBRR); - relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap); - return srcRel; - } - - return genFilterRelNode(searchCond, srcRel); - } - - private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { - RowResolver iRR = relToHiveRR.get(srcRel); - RowResolver oRR = new RowResolver(); - RowResolver.add(oRR, iRR, numColumns); - - List calciteColLst = new ArrayList(); - List oFieldNames = new ArrayList(); - RelDataType iType = srcRel.getRowType(); - - for (int i = 0; i < iType.getFieldCount(); i++) { - RelDataTypeField fType = iType.getFieldList().get(i); - String fName = iType.getFieldNames().get(i); - calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); - oFieldNames.add(fName); - } - - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); - - this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel)); - this.relToHiveRR.put(selRel, oRR); - return selRel; - } - - private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel, - boolean forHavingClause) throws SemanticException { - RelNode filterRel = null; - - Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() - .iterator(); - if (whereClauseIterator.hasNext()) { - filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, - aliasToRel, forHavingClause); - } - - return filterRel; - } - - /** - * Class to store GenericUDAF related information. - */ - private class AggInfo { - private final List m_aggParams; - private final TypeInfo m_returnType; - private final String m_udfName; - private final boolean m_distinct; - - private AggInfo(List aggParams, TypeInfo returnType, String udfName, - boolean isDistinct) { - m_aggParams = aggParams; - m_returnType = returnType; - m_udfName = udfName; - m_distinct = isDistinct; - } - } - - private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbChildProjLst, - RexNodeConverter converter, HashMap rexNodeToPosMap, - Integer childProjLstIndx) throws SemanticException { - - // 1. Get agg fn ret type in Calcite - RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, - this.cluster.getTypeFactory()); - - // 2. Convert Agg Fn args and type of args to Calcite - // TODO: Does HQL allows expressions as aggregate args or can it only be - // projections from child? - Integer inputIndx; - List argList = new ArrayList(); - RexNode rexNd = null; - RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); - ImmutableList.Builder aggArgRelDTBldr = new ImmutableList.Builder(); - for (ExprNodeDesc expr : agg.m_aggParams) { - rexNd = converter.convert(expr); - inputIndx = rexNodeToPosMap.get(rexNd.toString()); - if (inputIndx == null) { - gbChildProjLst.add(rexNd); - rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); - inputIndx = childProjLstIndx; - childProjLstIndx++; - } - argList.add(inputIndx); - - // TODO: does arg need type cast? - aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); - } - - // 3. Get Aggregation FN from Calcite given name, ret type and input arg - // type - final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, - aggArgRelDTBldr.build(), aggFnRetType); - - return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); - } - - private RelNode genGBRelNode(List gbExprs, List aggInfoLst, - RelNode srcRel) throws SemanticException { - RowResolver gbInputRR = this.relToHiveRR.get(srcRel); - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), - posMap, 0, false); - - final List gbChildProjLst = Lists.newArrayList(); - final HashMap rexNodeToPosMap = new HashMap(); - final List groupSetPositions = Lists.newArrayList(); - Integer gbIndx = 0; - RexNode rnd; - for (ExprNodeDesc key : gbExprs) { - rnd = converter.convert(key); - gbChildProjLst.add(rnd); - groupSetPositions.add(gbIndx); - rexNodeToPosMap.put(rnd.toString(), gbIndx); - gbIndx++; - } - final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); - - List aggregateCalls = Lists.newArrayList(); - int i = aggInfoLst.size(); - for (AggInfo agg : aggInfoLst) { - aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap, - gbChildProjLst.size())); - } - - if (gbChildProjLst.isEmpty()) { - // This will happen for count(*), in such cases we arbitarily pick - // first element from srcRel - gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); - } - RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); - - HiveRelNode aggregateRel = null; - try { - aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - gbInputRel, false, groupSet, null, aggregateCalls); - } catch (InvalidRelException e) { - throw new SemanticException(e); - } - - return aggregateRel; - } - - private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, - RowResolver gByInputRR, RowResolver gByRR) { - if (gByExpr.getType() == HiveParser.DOT - && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { - String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0) - .getText()); - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText()); - gByRR.put(tab_alias, col_alias, colInfo); - } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText()); - String tab_alias = null; - /* - * If the input to the GBy has a tab alias for the column, then add an - * entry based on that tab_alias. For e.g. this query: select b.x, - * count(*) from t1 b group by x needs (tab_alias=b, col_alias=x) in the - * GBy RR. tab_alias=b comes from looking at the RowResolver that is the - * ancestor before any GBy/ReduceSinks added for the GBY operation. - */ - try { - ColumnInfo pColInfo = gByInputRR.get(tab_alias, col_alias); - tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); - } catch (SemanticException se) { - } - gByRR.put(tab_alias, col_alias, colInfo); - } - } - - private void addToGBExpr(RowResolver groupByOutputRowResolver, - RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, - List gbExprNDescLst, List outputColumnNames) { - // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is - // UDF - int i = gbExprNDescLst.size(); - String field = getColumnInternalName(i); - outputColumnNames.add(field); - gbExprNDescLst.add(grpbyExprNDesc); - - ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); - groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); - - addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, - groupByOutputRowResolver); - } - - private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) - throws SemanticException { - AggInfo aInfo = null; - - // 1 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); - for (int i = 1; i <= aggFnLstArgIndx; i++) { - ASTNode paraExpr = (ASTNode) aggAst.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); - aggParameters.add(paraExprNode); - } - - // 2. Is this distinct UDAF - boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; - - // 3. Determine type of UDAF - TypeInfo udafRetType = null; - - // 3.1 Obtain UDAF name - String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); - - // 3.2 Rank functions type is 'int'/'double' - if (FunctionRegistry.isRankingFunction(aggName)) { - if (aggName.equalsIgnoreCase("percent_rank")) - udafRetType = TypeInfoFactory.doubleTypeInfo; - else - udafRetType = TypeInfoFactory.intTypeInfo; - } else { - // 3.3 Try obtaining UDAF evaluators to determine the ret type - try { - boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; - - // 3.3.1 Get UDAF Evaluator - Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); - - GenericUDAFEvaluator genericUDAFEvaluator = null; - if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) - || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { - ArrayList originalParameterTypeInfos = - getWritableObjectInspector(aggParameters); - genericUDAFEvaluator = - FunctionRegistry.getGenericWindowingEvaluator(aggName, - originalParameterTypeInfos, isDistinct, isAllColumns); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo(); - } else { - genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, - aggParameters, aggAst, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - - // 3.3.2 Get UDAF Info using UDAF Evaluator - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - udafRetType = udaf.returnType; - } - } catch (Exception e) { - LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName - + ", trying to translate to GenericUDF"); - } - - // 3.4 Try GenericUDF translation - if (udafRetType == null) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - // We allow stateful functions in the SELECT list (but nowhere else) - tcCtx.setAllowStatefulFunctions(true); - tcCtx.setAllowDistinctFunctions(false); - ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); - udafRetType = exp.getTypeInfo(); - } - } - - // 4. Construct AggInfo - aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); - - return aInfo; - } - - /** - * Generate GB plan. - * - * @param qb - * @param srcRel - * @return TODO: 1. Grouping Sets (roll up..) - * @throws SemanticException - */ - private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - RelNode gbRel = null; - QBParseInfo qbp = getQBParseInfo(qb); - - // 0. for GSets, Cube, Rollup, bail from Calcite path. - if (!qbp.getDestRollups().isEmpty() - || !qbp.getDestGroupingSets().isEmpty() - || !qbp.getDestCubes().isEmpty()) { - String gbyClause = null; - HashMap gbysMap = qbp.getDestToGroupBy(); - if (gbysMap.size() == 1) { - ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue(); - gbyClause = SemanticAnalyzer.this.ctx.getTokenRewriteStream() - .toString(gbyAST.getTokenStartIndex(), - gbyAST.getTokenStopIndex()); - gbyClause = "in '" + gbyClause + "'."; - } else { - gbyClause = "."; - } - String msg = String.format("Encountered Grouping Set/Cube/Rollup%s" - + " Currently we don't support Grouping Set/Cube/Rollup" - + " clauses in CBO," + " turn off cbo for these queries.", - gbyClause); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 1. Gather GB Expressions (AST) (GB + Aggregations) - // NOTE: Multi Insert is not supported - String detsClauseName = qbp.getClauseNames().iterator().next(); - List grpByAstExprs = getGroupByForClause(qbp, detsClauseName); - HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); - boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; - boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true - : false; - - if (hasGrpByAstExprs || hasAggregationTrees) { - ArrayList gbExprNDescLst = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); - - // 2. Input, Output Row Resolvers - RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); - RowResolver groupByOutputRowResolver = new RowResolver(); - groupByOutputRowResolver.setIsExprResolver(true); - - if (hasGrpByAstExprs) { - // 3. Construct GB Keys (ExprNode) - for (int i = 0; i < grpByAstExprs.size(); ++i) { - ASTNode grpbyExpr = grpByAstExprs.get(i); - Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( - grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); - ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); - if (grpbyExprNDesc == null) - throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); - - addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, - grpbyExprNDesc, gbExprNDescLst, outputColumnNames); - } - } - - // 4. Construct aggregation function Info - ArrayList aggregations = new ArrayList(); - if (hasAggregationTrees) { - assert (aggregationTrees != null); - for (ASTNode value : aggregationTrees.values()) { - // 4.1 Determine type of UDAF - // This is the GenericUDAF name - String aggName = unescapeIdentifier(value.getChild(0).getText()); - boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; - boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; - - // 4.2 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode) value.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); - aggParameters.add(paraExprNode); - } - - Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, - aggParameters, value, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); - aggregations.add(aInfo); - String field = getColumnInternalName(gbExprNDescLst.size() + aggregations.size() - 1); - outputColumnNames.add(field); - groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, - "", false)); - } - } - - gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); - relToHiveColNameCalcitePosMap.put(gbRel, - buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); - this.relToHiveRR.put(gbRel, groupByOutputRowResolver); - } - - return gbRel; - } - - /** - * Generate OB RelNode and input Select RelNode that should be used to - * introduce top constraining Project. If Input select RelNode is not - * present then don't introduce top constraining select. - * - * @param qb - * @param srcRel - * @param outermostOB - * @return Pair Key- OB RelNode, Value - Input Select for - * top constraining Select - * @throws SemanticException - */ - private Pair genOBLogicalPlan(QB qb, RelNode srcRel, boolean outermostOB) - throws SemanticException { - RelNode sortRel = null; - RelNode originalOBChild = null; - - QBParseInfo qbp = getQBParseInfo(qb); - String dest = qbp.getClauseNames().iterator().next(); - ASTNode obAST = qbp.getOrderByForClause(dest); - - if (obAST != null) { - // 1. OB Expr sanity test - // in strict mode, in the presence of order by, limit must be specified - Integer limit = qb.getParseInfo().getDestLimit(dest); - if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") - && limit == null) { - throw new SemanticException(generateErrorMessage(obAST, - ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); - } - - // 2. Walk through OB exprs and extract field collations and additional - // virtual columns needed - final List newVCLst = new ArrayList(); - final List fieldCollations = Lists.newArrayList(); - int fieldIndex = 0; - - List obASTExprLst = obAST.getChildren(); - ASTNode obASTExpr; - List> vcASTTypePairs = new ArrayList>(); - RowResolver inputRR = relToHiveRR.get(srcRel); - RowResolver outputRR = new RowResolver(); - - RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); - int srcRelRecordSz = srcRel.getRowType().getFieldCount(); - - for (int i = 0; i < obASTExprLst.size(); i++) { - // 2.1 Convert AST Expr to ExprNode - obASTExpr = (ASTNode) obASTExprLst.get(i); - Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( - obASTExpr, new TypeCheckCtx(inputRR)); - ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0)); - if (obExprNDesc == null) - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); - - // 2.2 Convert ExprNode to RexNode - rnd = converter.convert(obExprNDesc); - - // 2.3 Determine the index of ob expr in child schema - // NOTE: Calcite can not take compound exprs in OB without it being - // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); - } else { - fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair((ASTNode) obASTExpr.getChild(0), - obExprNDesc.getTypeInfo())); - } - - // 2.4 Determine the Direction of order by - org.apache.calcite.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; - if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { - order = RelFieldCollation.Direction.ASCENDING; - } - - // 2.5 Add to field collations - fieldCollations.add(new RelFieldCollation(fieldIndex, order)); - } - - // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel - // for top constraining Sel - RelNode obInputRel = srcRel; - if (!newVCLst.isEmpty()) { - List originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(), - new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - RowResolver obSyntheticProjectRR = new RowResolver(); - if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - int vcolPos = inputRR.getRowSchema().getSignature().size(); - for (Pair astTypePair : vcASTTypePairs) { - obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo( - getColumnInternalName(vcolPos), astTypePair.getValue(), null, false)); - vcolPos++; - } - obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, newVCLst), - obSyntheticProjectRR, srcRel); - - if (outermostOB) { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - - } else { - if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - originalOBChild = srcRel; - } - } else { - if (!RowResolver.add(outputRR, inputRR)) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - } - - // 4. Construct SortRel - RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations)); - sortRel = new HiveSort(cluster, traitSet, obInputRel, canonizedCollation, null, null); - - // 5. Update the maps - // NOTE: Output RR for SortRel is considered same as its input; we may - // end up not using VC that is present in sort rel. Also note that - // rowtype of sortrel is the type of it child; if child happens to be - // synthetic project that we introduced then that projectrel would - // contain the vc. - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR, - sortRel); - relToHiveRR.put(sortRel, outputRR); - relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); - } - - return (new Pair(sortRel, originalOBChild)); - } - - private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - HiveRelNode sortRel = null; - QBParseInfo qbp = getQBParseInfo(qb); - Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); - - if (limit != null) { - RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit)); - RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); - RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.EMPTY); - sortRel = new HiveSort(cluster, traitSet, srcRel, canonizedCollation, null, fetch); - - RowResolver outputRR = new RowResolver(); - if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) { - throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); - } - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR, - sortRel); - relToHiveRR.put(sortRel, outputRR); - relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); - } - - return sortRel; - } - - List getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, RowResolver inputRR) - throws SemanticException { - List pKeys = new ArrayList(); - if (ps != null) { - List pExprs = ps.getExpressions(); - for (PartitionExpression pExpr : pExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx); - pKeys.add(converter.convert(exp)); - } - } - - return pKeys; - } - - List getOrderKeys(OrderSpec os, RexNodeConverter converter, - RowResolver inputRR) throws SemanticException { - List oKeys = new ArrayList(); - if (os != null) { - List oExprs = os.getExpressions(); - for (OrderExpression oExpr : oExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx); - RexNode ordExp = converter.convert(exp); - Set flags = new HashSet(); - if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) - flags.add(SqlKind.DESCENDING); - oKeys.add(new RexFieldCollation(ordExp, flags)); - } - } - - return oKeys; - } - - RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) { - RexWindowBound rwb = null; - - if (bs != null) { - SqlNode sn = null; - SqlParserPos pos = new SqlParserPos(1, 1); - SqlNode amt = bs.getAmt() == 0 ? null : SqlLiteral.createExactNumeric( - String.valueOf(bs.getAmt()), new SqlParserPos(2, 2)); - RexNode amtLiteral = null; - SqlCall sc = null; - RexNode rn = null; - - if (amt != null) - amtLiteral = cluster.getRexBuilder().makeLiteral(new Integer(bs.getAmt()), - cluster.getTypeFactory().createSqlType(SqlTypeName.INTEGER), true); - - switch (bs.getDirection()) { - case PRECEDING: - if (amt == null) { - rwb = RexWindowBound.create(SqlWindow.createUnboundedPreceding(pos), null); - } else { - sc = (SqlCall) SqlWindow.createPreceding(amt, pos); - rwb = RexWindowBound.create(sc, - cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); - } - break; - - case CURRENT: - rwb = RexWindowBound.create(SqlWindow.createCurrentRow(new SqlParserPos(1, 1)), null); - break; - - case FOLLOWING: - if (amt == null) { - rwb = RexWindowBound.create(SqlWindow.createUnboundedFollowing(new SqlParserPos(1, 1)), - null); - } else { - sc = (SqlCall) SqlWindow.createFollowing(amt, pos); - rwb = RexWindowBound.create(sc, - cluster.getRexBuilder().makeCall(sc.getOperator(), amtLiteral)); - } - break; - } - } - - return rwb; - } - - int getWindowSpecIndx(ASTNode wndAST) { - int wndASTIndx = -1; - int wi = wndAST.getChildCount() - 1; - if (wi <= 0 || (wndAST.getChild(wi).getType() != HiveParser.TOK_WINDOWSPEC)) { - wi = -1; - } - - return wi; - } - - Pair genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, RelNode srcRel) - throws SemanticException { - RexNode w = null; - TypeInfo wHiveRetType = null; - - if (wExpSpec instanceof WindowFunctionSpec) { - WindowFunctionSpec wFnSpec = (WindowFunctionSpec) wExpSpec; - ASTNode windowProjAst = wFnSpec.getExpression(); - // TODO: do we need to get to child? - int wndSpecASTIndx = getWindowSpecIndx(windowProjAst); - // 2. Get Hive Aggregate Info - AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, - this.relToHiveRR.get(srcRel)); - - // 3. Get Calcite Return type for Agg Fn - wHiveRetType = hiveAggInfo.m_returnType; - RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, - this.cluster.getTypeFactory()); - - // 4. Convert Agg Fn args to Calcite - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), - posMap, 0, false); - Builder calciteAggFnArgsBldr = ImmutableList. builder(); - Builder calciteAggFnArgsTypeBldr = ImmutableList. builder(); - RexNode rexNd = null; - for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { - calciteAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); - calciteAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) - .getTypeInfo(), this.cluster.getTypeFactory())); - } - ImmutableList calciteAggFnArgs = calciteAggFnArgsBldr.build(); - ImmutableList calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build(); - - // 5. Get Calcite Agg Fn - final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn(hiveAggInfo.m_udfName, - calciteAggFnArgsType, calciteAggFnRetType); - - // 6. Translate Window spec - RowResolver inputRR = relToHiveRR.get(srcRel); - WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); - List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); - List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); - RexWindowBound upperBound = getBound(wndSpec.windowFrame.start, converter); - RexWindowBound lowerBound = getBound(wndSpec.windowFrame.end, converter); - boolean isRows = ((wndSpec.windowFrame.start instanceof RangeBoundarySpec) || (wndSpec.windowFrame.end instanceof RangeBoundarySpec)) ? true - : false; - - w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, - partitionKeys, ImmutableList. copyOf(orderKeys), lowerBound, - upperBound, isRows, true, false); - } else { - // TODO: Convert to Semantic Exception - throw new RuntimeException("Unsupported window Spec"); - } - - return new Pair(w, wHiveRetType); - } - - private RelNode genSelectForWindowing( - QB qb, RelNode srcRel, HashSet newColumns) throws SemanticException { - QBParseInfo qbp = getQBParseInfo(qb); - WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs() - .values().iterator().next() : null; - if (wSpec == null) return null; - // 1. Get valid Window Function Spec - wSpec.validateAndMakeEffective(); - List windowExpressions = wSpec.getWindowExpressions(); - if (windowExpressions == null || windowExpressions.isEmpty()) return null; - - RowResolver inputRR = this.relToHiveRR.get(srcRel); - // 2. Get RexNodes for original Projections from below - List projsForWindowSelOp = new ArrayList( - HiveCalciteUtil.getProjsFromBelowAsInputRef(srcRel)); - - // 3. Construct new Row Resolver with everything from below. - RowResolver out_rwsch = new RowResolver(); - if (!RowResolver.add(out_rwsch, inputRR)) { - LOG.warn("Duplicates detected when adding columns to RR: see previous message"); - } - - // 4. Walk through Window Expressions & Construct RexNodes for those, - // Update out_rwsch - for (WindowExpressionSpec wExprSpec : windowExpressions) { - if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { - Pair wtp = genWindowingProj(qb, wExprSpec, srcRel); - projsForWindowSelOp.add(wtp.getKey()); - - // 6.2.2 Update Output Row Schema - ColumnInfo oColInfo = new ColumnInfo( - getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false); - if (false) { - out_rwsch.put(null, wExprSpec.getAlias(), oColInfo); - } else { - out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo); - } - newColumns.add(oColInfo); - } - } - - return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); - } - - private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, - RelNode srcRel) throws CalciteSemanticException { - // 1. Build Column Names - Set colNamesSet = new HashSet(); - List cInfoLst = out_rwsch.getRowSchema().getSignature(); - ArrayList columnNames = new ArrayList(); - String[] qualifiedColNames; - String tmpColAlias; - for (int i = 0; i < calciteColLst.size(); i++) { - ColumnInfo cInfo = cInfoLst.get(i); - qualifiedColNames = out_rwsch.reverseLookup(cInfo.getInternalName()); - /* - if (qualifiedColNames[0] != null && !qualifiedColNames[0].isEmpty()) - tmpColAlias = qualifiedColNames[0] + "." + qualifiedColNames[1]; - else - */ - tmpColAlias = qualifiedColNames[1]; - - // Prepend column names with '_o_' if it starts with '_c' - /* - * Hive treats names that start with '_c' as internalNames; so change - * the names so we don't run into this issue when converting back to - * Hive AST. - */ - if (tmpColAlias.startsWith("_c")) - tmpColAlias = "_o_" + tmpColAlias; - int suffix = 1; - while (colNamesSet.contains(tmpColAlias)) { - tmpColAlias = qualifiedColNames[1] + suffix; - suffix++; - } - - colNamesSet.add(tmpColAlias); - columnNames.add(tmpColAlias); - } - - // 3 Build Calcite Rel Node for project using converted projections & col - // names - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); - - // 4. Keep track of colname-to-posmap && RR for new select - this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); - this.relToHiveRR.put(selRel, out_rwsch); - - return selRel; - } - - private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, - RelNode srcRel, boolean removethismethod) throws CalciteSemanticException { - // 1. Build Column Names - // TODO: Should this be external names - ArrayList columnNames = new ArrayList(); - for (int i = 0; i < calciteColLst.size(); i++) { - columnNames.add(getColumnInternalName(i)); - } - - // 2. Prepend column names with '_o_' - /* - * Hive treats names that start with '_c' as internalNames; so change the - * names so we don't run into this issue when converting back to Hive AST. - */ - List oFieldNames = Lists.transform(columnNames, new Function() { - @Override - public String apply(String hName) { - return "_o_" + hName; - } - }); - - // 3 Build Calcite Rel Node for project using converted projections & col - // names - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); - - // 4. Keep track of colname-to-posmap && RR for new select - this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); - this.relToHiveRR.put(selRel, out_rwsch); - - return selRel; - } - - /** - * NOTE: there can only be one select caluse since we don't handle multi - * destination insert. - * - * @throws SemanticException - */ - private RelNode genSelectLogicalPlan( - QB qb, RelNode srcRel, RelNode starSrcRel) throws SemanticException { - // 0. Generate a Select Node for Windowing - // Exclude the newly-generated select columns from */etc. resolution. - HashSet excludedColumns = new HashSet(); - RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns); - srcRel = (selForWindow == null) ? srcRel : selForWindow; - - boolean subQuery; - ArrayList col_list = new ArrayList(); - ArrayList> windowingRexNodes = new ArrayList>(); - - // 1. Get Select Expression List - QBParseInfo qbp = getQBParseInfo(qb); - String selClauseName = qbp.getClauseNames().iterator().next(); - ASTNode selExprList = qbp.getSelForClause(selClauseName); - - // 2.Row resolvers for input, output - RowResolver out_rwsch = new RowResolver(); - ASTNode trfm = null; - Integer pos = Integer.valueOf(0); - // TODO: will this also fix windowing? try - RowResolver inputRR = this.relToHiveRR.get(srcRel), starRR = inputRR; - if (starSrcRel != null) { - starRR = this.relToHiveRR.get(starSrcRel); - } - - // 3. Query Hints - // TODO: Handle Query Hints; currently we ignore them - boolean selectStar = false; - int posn = 0; - boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); - if (hintPresent) { - String hint = SemanticAnalyzer.this.ctx.getTokenRewriteStream(). - toString( - selExprList.getChild(0).getTokenStartIndex(), - selExprList.getChild(0).getTokenStopIndex()); - String msg = String.format("Hint specified for %s." - + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 4. Bailout if select involves Transform - boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); - if (isInTransform) { - String msg = String.format("SELECT TRANSFORM is currently not supported in CBO," - + " turn off cbo to use TRANSFORM."); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - - // 5. Bailout if select involves UDTF - ASTNode expr = (ASTNode) selExprList.getChild(posn).getChild(0); - int exprType = expr.getType(); - if (exprType == HiveParser.TOK_FUNCTION || exprType == HiveParser.TOK_FUNCTIONSTAR) { - String funcName = TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(expr, true); - FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); - if (fi != null && fi.getGenericUDTF() != null) { - String msg = String.format("UDTF " + funcName + " is currently not supported in CBO," - + " turn off cbo to use UDTF " + funcName); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - } - - // 6. Iterate over all expression (after SELECT) - ASTNode exprList = selExprList; - int startPosn = posn; - int wndProjPos = 0; - List tabAliasesForAllProjs = getTabAliases(starRR); - for (int i = startPosn; i < exprList.getChildCount(); ++i) { - - // 6.1 child can be EXPR AS ALIAS, or EXPR. - ASTNode child = (ASTNode) exprList.getChild(i); - boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); - - // 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's - // This check is not needed and invalid when there is a transform b/c - // the - // AST's are slightly different. - if (child.getChildCount() > 2) { - throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(2), - ErrorMsg.INVALID_AS.getMsg())); - } - - String tabAlias; - String colAlias; - - // 6.3 Get rid of TOK_SELEXPR - expr = (ASTNode) child.getChild(0); - String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, - autogenColAliasPrfxIncludeFuncName, i); - tabAlias = colRef[0]; - colAlias = colRef[1]; - - // 6.4 Build ExprNode corresponding to colums - if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", - expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)) - .toLowerCase(), expr, col_list, excludedColumns, inputRR, starRR, pos, - out_rwsch, tabAliasesForAllProjs, true); - selectStar = true; - } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause - && !inputRR.getIsExprResolver() - && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) { - // In case the expression is a regex COL. - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, - col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, - true); - } else if (expr.getType() == HiveParser.DOT - && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL - && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText() - .toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() - && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) { - // In case the expression is TABLE.COL (col can be regex). - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), - unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr, - col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, tabAliasesForAllProjs, - true); - } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") - && !(srcRel instanceof HiveAggregate)) { - // Likely a malformed query eg, select hash(distinct c1) from t1; - throw new CalciteSemanticException("Distinct without an aggreggation."); - } else { - // Case when this is an expression - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - // We allow stateful functions in the SELECT list (but nowhere else) - tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); - String recommended = recommendName(exp, colAlias); - if (recommended != null && out_rwsch.get(null, recommended) == null) { - colAlias = recommended; - } - col_list.add(exp); - - ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); - colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) - .isSkewedCol() : false); - if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { - throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." - + colAlias + " => " + colInfo + " due to duplication, see previous warnings"); - } - - if (exp instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; - String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); - if (altMapping != null) { - out_rwsch.put(altMapping[0], altMapping[1], colInfo); - } - } - - pos = Integer.valueOf(pos.intValue() + 1); - } - } - selectStar = selectStar && exprList.getChildCount() == posn + 1; - - // 7. Convert Hive projections to Calcite - List calciteColLst = new ArrayList(); - RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), - buildHiveColNameToInputPosMap(col_list, inputRR), 0, false); - for (ExprNodeDesc colExpr : col_list) { - calciteColLst.add(rexNodeConv.convert(colExpr)); - } - - // 8. Build Calcite Rel - RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); - - return selRel; - } - - private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - return genLogicalPlan(qbexpr.getQB(), false); - } - if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { - RelNode qbexpr1Ops = genLogicalPlan(qbexpr.getQBExpr1()); - RelNode qbexpr2Ops = genLogicalPlan(qbexpr.getQBExpr2()); - - return genUnionLogicalPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), qbexpr1Ops, - qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); - } - return null; - } - - private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { - RelNode srcRel = null; - RelNode filterRel = null; - RelNode gbRel = null; - RelNode gbHavingRel = null; - RelNode havingRel = null; - RelNode selectRel = null; - RelNode obRel = null; - RelNode limitRel = null; - - RelNode rootRel = null; - // First generate all the opInfos for the elements in the from clause - Map aliasToRel = new HashMap(); - - // 0. Check if we can handle the SubQuery; - // canHandleQbForCbo returns null if the query can be handled. - String reason = canHandleQbForCbo(qb, false, LOG.isDebugEnabled()); - if (reason != null) { - String msg = "CBO can not handle Sub Query"; - if (LOG.isDebugEnabled()) { - LOG.debug(msg + " because it: " + reason); - } - throw new CalciteSemanticException(msg); - } - - // 1. Build Rel For Src (SubQuery, TS, Join) - // 1.1. Recurse over the subqueries to fill the subquery part of the plan - for (String subqAlias : qb.getSubqAliases()) { - QBExpr qbexpr = qb.getSubqForAlias(subqAlias); - aliasToRel.put(subqAlias, genLogicalPlan(qbexpr)); - } - - // 1.2 Recurse over all the source tables - for (String tableAlias : qb.getTabAliases()) { - RelNode op = genTableLogicalPlan(tableAlias, qb); - aliasToRel.put(tableAlias, op); - } - - if (aliasToRel.isEmpty()) { - // // This may happen for queries like select 1; (no source table) - // We can do following which is same, as what Hive does. - // With this, we will be able to generate Calcite plan. - // qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable()); - // RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); - // qb.addAlias(DUMMY_TABLE); - // qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); - // aliasToRel.put(DUMMY_TABLE, op); - // However, Hive trips later while trying to get Metadata for this dummy - // table - // So, for now lets just disable this. Anyway there is nothing much to - // optimize in such cases. - throw new CalciteSemanticException("Unsupported"); - - } - // 1.3 process join - if (qb.getParseInfo().getJoinExpr() != null) { - srcRel = genJoinLogicalPlan(qb.getParseInfo().getJoinExpr(), aliasToRel); - } else { - // If no join then there should only be either 1 TS or 1 SubQuery - srcRel = aliasToRel.values().iterator().next(); - } - - // 2. Build Rel for where Clause - filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); - srcRel = (filterRel == null) ? srcRel : filterRel; - RelNode starSrcRel = srcRel; - - // 3. Build Rel for GB Clause - gbRel = genGBLogicalPlan(qb, srcRel); - srcRel = (gbRel == null) ? srcRel : gbRel; - - // 4. Build Rel for GB Having Clause - gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); - srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; - - // 5. Build Rel for Select Clause - selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel); - srcRel = (selectRel == null) ? srcRel : selectRel; - - // 6. Build Rel for OB Clause - Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, outerMostQB); - obRel = obTopProjPair.getKey(); - RelNode topConstrainingProjArgsRel = obTopProjPair.getValue(); - srcRel = (obRel == null) ? srcRel : obRel; - - // 7. Build Rel for Limit Clause - limitRel = genLimitLogicalPlan(qb, srcRel); - srcRel = (limitRel == null) ? srcRel : limitRel; - - // 8. Introduce top constraining select if needed. - // NOTES: - // 1. Calcite can not take an expr in OB; hence it needs to be added as VC - // in the input select; In such cases we need to introduce a select on top - // to ensure VC is not visible beyond Limit, OB. - // 2. Hive can not preserve order across select. In subqueries OB is used - // to get a deterministic set of tuples from following limit. Hence we - // introduce the constraining select above Limit (if present) instead of - // OB. - // 3. The top level OB will not introduce constraining select due to Hive - // limitation(#2) stated above. The RR for OB will not include VC. Thus - // Result Schema will not include exprs used by top OB. During AST Conv, - // in the PlanModifierForASTConv we would modify the top level OB to - // migrate exprs from input sel to SortRel (Note that Calcite doesn't - // support this; but since we are done with Calcite at this point its OK). - if (topConstrainingProjArgsRel != null) { - List originalInputRefs = Lists.transform(topConstrainingProjArgsRel.getRowType() - .getFieldList(), new Function() { - @Override - public RexNode apply(RelDataTypeField input) { - return new RexInputRef(input.getIndex(), input.getType()); - } - }); - RowResolver topConstrainingProjRR = new RowResolver(); - if (!RowResolver.add( - topConstrainingProjRR, this.relToHiveRR.get(topConstrainingProjArgsRel))) { - LOG.warn("Duplicates detected when adding columns to RR: see previous message"); - } - srcRel = genSelectRelNode(originalInputRefs, topConstrainingProjRR, srcRel); - } - - // 9. Incase this QB corresponds to subquery then modify its RR to point - // to subquery alias - // TODO: cleanup this - if (qb.getParseInfo().getAlias() != null) { - RowResolver rr = this.relToHiveRR.get(srcRel); - RowResolver newRR = new RowResolver(); - String alias = qb.getParseInfo().getAlias(); - for (ColumnInfo colInfo : rr.getColumnInfos()) { - String name = colInfo.getInternalName(); - String[] tmp = rr.reverseLookup(name); - if ("".equals(tmp[0]) || tmp[1] == null) { - // ast expression is not a valid column name for table - tmp[1] = colInfo.getInternalName(); - } - ColumnInfo newCi = new ColumnInfo(colInfo); - newCi.setTabAlias(alias); - newRR.put(alias, tmp[1], newCi); - } - relToHiveRR.put(srcRel, newRR); - relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel)); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Created Plan for Query Block " + qb.getId()); - } - - return srcRel; - } - - private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel) - throws SemanticException { - RelNode gbFilter = null; - QBParseInfo qbp = getQBParseInfo(qb); - ASTNode havingClause = qbp.getHavingForClause(qbp.getClauseNames().iterator().next()); - - if (havingClause != null) { - if (!(srcRel instanceof HiveAggregate)) { - // ill-formed query like select * from t1 having c1 > 0; - throw new CalciteSemanticException("Having clause without any group-by."); - } - validateNoHavingReferenceToAlias(qb, (ASTNode) havingClause.getChild(0)); - gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, - true); - } - - return gbFilter; - } - - /* - * Bail if having clause uses Select Expression aliases for Aggregation - * expressions. We could do what Hive does. But this is non standard - * behavior. Making sure this doesn't cause issues when translating through - * Calcite is not worth it. - */ - private void validateNoHavingReferenceToAlias(QB qb, ASTNode havingExpr) - throws CalciteSemanticException { - - QBParseInfo qbPI = qb.getParseInfo(); - Map exprToAlias = qbPI.getAllExprToColumnAlias(); - /* - * a mouthful, but safe: - * - a QB is guaranteed to have atleast 1 destination - * - we don't support multi insert, so picking the first dest. - */ - Set aggExprs = qbPI.getDestToAggregationExprs().values() - .iterator().next().keySet(); - - for (Map.Entry selExpr : exprToAlias.entrySet()) { - ASTNode selAST = selExpr.getKey(); - if (!aggExprs.contains(selAST.toStringTree().toLowerCase())) { - continue; - } - final String aliasToCheck = selExpr.getValue(); - final Set aliasReferences = new HashSet(); - TreeVisitorAction action = new TreeVisitorAction() { - - @Override - public Object pre(Object t) { - if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) { - Object c = ParseDriver.adaptor.getChild(t, 0); - if (c != null - && ParseDriver.adaptor.getType(c) == HiveParser.Identifier - && ParseDriver.adaptor.getText(c).equals(aliasToCheck)) { - aliasReferences.add(t); - } - } - return t; - } - - @Override - public Object post(Object t) { - return t; - } - }; - new TreeVisitor(ParseDriver.adaptor).visit(havingExpr, action); - - if (aliasReferences.size() > 0) { - String havingClause = SemanticAnalyzer.this.ctx - .getTokenRewriteStream().toString( - havingExpr.getTokenStartIndex(), - havingExpr.getTokenStopIndex()); - String msg = String.format( - "Encountered Select alias '%s' in having clause '%s'" - + " This non standard behavior is not supported with cbo on." - + " Turn off cbo for these queries.", aliasToCheck, - havingClause); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - } - - } - - private ImmutableMap buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) { - ImmutableMap.Builder b = new ImmutableMap.Builder(); - int i = 0; - for (ColumnInfo ci : rr.getRowSchema().getSignature()) { - b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName())); - } - return b.build(); - } - - private ImmutableMap buildHiveColNameToInputPosMap( - List col_list, RowResolver inputRR) { - // Build a map of Hive column Names (ExprNodeColumnDesc Name) - // to the positions of those projections in the input - Map hashCodeTocolumnDescMap = new HashMap(); - ExprNodeDescUtils.getExprNodeColumnDesc(col_list, hashCodeTocolumnDescMap); - ImmutableMap.Builder hiveColNameToInputPosMapBuilder = new ImmutableMap.Builder(); - String exprNodecolName; - for (ExprNodeDesc exprDesc : hashCodeTocolumnDescMap.values()) { - exprNodecolName = ((ExprNodeColumnDesc) exprDesc).getColumn(); - hiveColNameToInputPosMapBuilder.put(exprNodecolName, inputRR.getPosition(exprNodecolName)); - } - - return hiveColNameToInputPosMapBuilder.build(); - } - - private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { - QBParseInfo qbp = qb.getParseInfo(); - if (qbp.getClauseNames().size() > 1) { - String msg = String.format("Multi Insert is currently not supported in CBO," - + " turn off cbo to use Multi Insert."); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - return qbp; - } - - private List getTabAliases(RowResolver inputRR) { - List tabAliases = new ArrayList(); // TODO: this should be unique - for (ColumnInfo ci : inputRR.getColumnInfos()) { - tabAliases.add(ci.getTabAlias()); - } - - return tabAliases; - } - } - - private static class ExceptionHelper { - private static final Field CAUSE_FIELD = getField(Throwable.class, "cause"), - TARGET_FIELD = getField(InvocationTargetException.class, "target"), - MESSAGE_FIELD = getField(Throwable.class, "detailMessage"); - private static Field getField(Class clazz, String name) { - try { - Field f = clazz.getDeclaredField(name); - f.setAccessible(true); - return f; - } catch (Throwable t) { - return null; - } - } - public static boolean resetCause(Throwable target, Throwable newCause) { - try { - if (MESSAGE_FIELD == null) return false; - Field field = (target instanceof InvocationTargetException) ? TARGET_FIELD : CAUSE_FIELD; - if (field == null) return false; - - Throwable oldCause = target.getCause(); - String oldMsg = target.getMessage(); - field.set(target, newCause); - if (oldMsg != null && oldMsg.equals(oldCause.toString())) { - MESSAGE_FIELD.set(target, newCause == null ? null : newCause.toString()); - } - } catch (Throwable se) { - return false; - } - return true; - } - } }