Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java (revision 836131) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java (working copy) @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.DMLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc; Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java (revision 836131) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java (working copy) @@ -25,7 +25,7 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.DMLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.*; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; Index: ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (revision 836131) +++ ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (working copy) @@ -38,7 +38,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.cli.CliDriver; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; @@ -53,7 +52,7 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.ParseException; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.DMLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.Constants; @@ -83,7 +82,7 @@ private Hive db; private HiveConf conf; private Driver drv; - private SemanticAnalyzer sem; + private DMLSemanticAnalyzer sem; private FileSystem fs; private boolean overWrite; private CliDriver cliDriver; @@ -382,7 +381,7 @@ fs = FileSystem.get(conf); drv = new Driver(conf); pd = new ParseDriver(); - sem = new SemanticAnalyzer(conf); + sem = new DMLSemanticAnalyzer(conf); } public void init(String tname) throws Exception { @@ -751,7 +750,7 @@ sem.analyze(ast, ctx); ctx.clear(); - return sem.getRootTasks(); + return new ArrayList>(sem.getPhysicalPlan().getRootTasks()); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy) @@ -21,22 +21,20 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.ArrayList; -import java.util.Map; import java.util.HashMap; -import java.util.Set; import java.io.Serializable; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; +import org.apache.hadoop.hive.ql.parse.PhysicalPlan; +import org.apache.hadoop.hive.ql.parse.DMLSemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.tableDesc; /** @@ -199,81 +197,73 @@ } } - private HiveConf conf; - private HashMap, Task> opTaskMap; - private HashMap unionTaskMap; - private HashMap mapJoinTaskMap; - private List> seenOps; - private List seenFileSinkOps; + private Context context; + + private LogicalPlan logicalPlan; + + private PhysicalPlan physicalPlan; + + private HashMap, Task> opTaskMap = + new HashMap, Task>(); + + private HashMap unionTaskMap = + new HashMap(); + + private HashMap mapJoinTaskMap = + new HashMap(); + + private List> seenOps = + new ArrayList>(); + + private List seenFileSinkOps = + new ArrayList(); - private ParseContext parseCtx; - private List> mvTask; - private List> rootTasks; - - private LinkedHashMap, GenMapRedCtx> mapCurrCtx; - private Task currTask; - private Operator currTopOp; - private UnionOperator currUnionOp; - private MapJoinOperator currMapJoinOp; - private String currAliasId; - private List> rootOps; + private LinkedHashMap, GenMapRedCtx> mapCurrCtx = + new LinkedHashMap, GenMapRedCtx>(); - /** - * Set of read entities. This list is generated by the walker and is - * passed to the hooks. - */ - private Set inputs; - /** - * Set of write entities. This list is generated by the walker and is - * passed to the hooks. - */ - private Set outputs; + private Task currTask = null; + private Operator currTopOp = null; + private UnionOperator currUnionOp = null; + private MapJoinOperator currMapJoinOp = null; + private String currAliasId = null; - public GenMRProcContext() { - } + private List> rootOps = + new ArrayList>(); + private List> mvTask; + /** - * @param conf hive configuration - * @param opTaskMap reducer to task mapping - * @param seenOps operator already visited - * @param parseCtx current parse context - * @param rootTasks root tasks for the plan - * @param mvTask the final move task - * @param mapCurrCtx operator to task mappings - * @param inputs the set of input tables/partitions generated by the walk - * @param outputs the set of destinations generated by the walk + * */ - public GenMRProcContext ( - HiveConf conf, - HashMap, Task> opTaskMap, - List> seenOps, - ParseContext parseCtx, - List> mvTask, - List> rootTasks, - LinkedHashMap, GenMapRedCtx> mapCurrCtx, - Set inputs, - Set outputs) + //TODO CWS add javadoc + public GenMRProcContext (Context context, LogicalPlan logicalPlan, PhysicalPlan physicalPlan, + List> mvTask) { - this.conf = conf; - this.opTaskMap = opTaskMap; - this.seenOps = seenOps; - this.mvTask = mvTask; - this.parseCtx = parseCtx; - this.rootTasks = rootTasks; - this.mapCurrCtx = mapCurrCtx; - this.inputs = inputs; - this.outputs = outputs; - currTask = null; - currTopOp = null; - currUnionOp = null; - currMapJoinOp = null; - currAliasId = null; - rootOps = new ArrayList>(); - rootOps.addAll(parseCtx.getTopOps().values()); - unionTaskMap = new HashMap(); - mapJoinTaskMap = new HashMap(); + this.context = context; + this.logicalPlan = logicalPlan; + this.physicalPlan = physicalPlan; + this.mvTask = mvTask; + + rootOps.addAll(logicalPlan.getTopOps()); } + public Context getContext() { + return context; + } + + public HiveConf getHiveConf() { + return context.getHiveConf(); + } + + + public LogicalPlan getLogicalPlan() { + return logicalPlan; + } + + public PhysicalPlan getPhysicalPlan() { + return physicalPlan; + } + /** * @return reducer to task mapping */ @@ -330,19 +320,7 @@ this.rootOps = rootOps; } - /** - * @return current parse context - */ - public ParseContext getParseCtx() { - return parseCtx; - } - /** - * @param parseCtx current parse context - */ - public void setParseCtx(ParseContext parseCtx) { - this.parseCtx = parseCtx; - } /** * @return the final move task @@ -359,32 +337,12 @@ } /** - * @return root tasks for the plan - */ - public List> getRootTasks() { - return rootTasks; - } - - /** - * @param rootTasks root tasks for the plan - */ - public void setRootTasks(List> rootTasks) { - this.rootTasks = rootTasks; - } - - /** * @return operator to task mappings */ public LinkedHashMap, GenMapRedCtx> getMapCurrCtx() { return mapCurrCtx; } - /** - * @param mapCurrCtx operator to task mappings - */ - public void setMapCurrCtx(LinkedHashMap, GenMapRedCtx> mapCurrCtx) { - this.mapCurrCtx = mapCurrCtx; - } /** * @return current task @@ -466,31 +424,4 @@ mapJoinTaskMap.put(op, mjCtx); } - /** - * Get the input set. - */ - public Set getInputs() { - return inputs; - } - - /** - * Get the output set. - */ - public Set getOutputs() { - return outputs; - } - - /** - * @return the conf - */ - public HiveConf getConf() { - return conf; - } - - /** - * @param conf the conf to set - */ - public void setConf(HiveConf conf) { - this.conf = conf; - } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (working copy) @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Stack; import java.io.Serializable; -import java.io.File; import java.util.Map; import org.apache.hadoop.hive.ql.exec.Operator; @@ -32,13 +31,13 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; +import org.apache.hadoop.hive.ql.parse.PhysicalPlan; +import org.apache.hadoop.hive.ql.parse.DMLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.plan.partitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.plan.fileSinkDesc; import org.apache.hadoop.hive.conf.HiveConf; @@ -54,8 +53,6 @@ */ public class GenMRUnion1 implements NodeProcessor { - public GenMRUnion1() { - } /** * Union Operator encountered . @@ -70,8 +67,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { UnionOperator union = (UnionOperator)nd; GenMRProcContext ctx = (GenMRProcContext)opProcCtx; - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); + LogicalPlan logicalPlan = ctx.getLogicalPlan(); + PhysicalPlan physicalPlan = ctx.getPhysicalPlan(); + UnionProcContext uCtx = logicalPlan.getUCtx(); // Map-only subqueries can be optimized in future to not write to a file in future Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); @@ -91,8 +89,9 @@ int pos = UnionProcFactory.getPositionParent(union, stack); // is the current task a root task - if (uPrsCtx.getRootTask(pos) && (!ctx.getRootTasks().contains(currTask))) - ctx.getRootTasks().add(currTask); + if (uPrsCtx.getRootTask(pos)) { + physicalPlan.addRootTask(currTask); + } GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); Task uTask = null; @@ -104,7 +103,7 @@ if (uCtxTask == null) { uCtxTask = new GenMRUnionCtx(); uPlan = GenMapRedUtils.getMapRedWork(); - uTask = TaskFactory.get(uPlan, parseCtx.getConf()); + uTask = TaskFactory.get(uPlan, ctx.getHiveConf()); uCtxTask.setUTask(uTask); ctx.setUnionTask(union, uCtxTask); } @@ -117,7 +116,7 @@ PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); // generate the temporary file - Context baseCtx = parseCtx.getContext(); + Context baseCtx = ctx.getContext(); String taskTmpDir = baseCtx.getMRTmpFileURI(); // Add the path to alias mapping @@ -131,7 +130,7 @@ Operator fs_op = OperatorFactory.get (new fileSinkDesc(taskTmpDir, tt_desc, - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), + ctx.getHiveConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema()); assert parent.getChildOperators().size() == 1; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -180,7 +180,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ReduceSinkOperator op = (ReduceSinkOperator)nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx; - HashMap, OpParseContext> opToParseCtxMap = + Map, OpParseContext> opToParseCtxMap = cppCtx.getOpToParseCtxMap(); RowResolver redSinkRR = opToParseCtxMap.get(op).getRR(); reduceSinkDesc conf = op.getConf(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy) @@ -22,6 +22,7 @@ import java.util.Map; import java.util.Stack; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -29,16 +30,14 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; /** * Processor for the rule - table scan */ public class GenMRTableScan1 implements NodeProcessor { - public GenMRTableScan1() { - } /** * Table Sink encountered @@ -48,17 +47,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator op = (TableScanOperator)nd; GenMRProcContext ctx = (GenMRProcContext)opProcCtx; - ParseContext parseCtx = ctx.getParseCtx(); + HiveConf hiveConf = ctx.getHiveConf(); + LogicalPlan logicalPlan = ctx.getLogicalPlan(); Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); // create a dummy task - Task currTask = TaskFactory.get(GenMapRedUtils.getMapRedWork(), parseCtx.getConf()); + Task currTask = TaskFactory.get(GenMapRedUtils.getMapRedWork(), hiveConf); Operator currTopOp = op; ctx.setCurrTask(currTask); ctx.setCurrTopOp(currTopOp); - for (String alias : parseCtx.getTopOps().keySet()) { - Operator currOp = parseCtx.getTopOps().get(alias); + for (String alias : logicalPlan.getTopOpAliases()) { + Operator currOp = logicalPlan.getTopOp(alias); if (currOp == op) { String currAliasId = alias; ctx.setCurrAliasId(currAliasId); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java (working copy) @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.SemanticException; /** @@ -32,5 +32,5 @@ * @return ParseContext * @throws SemanticException */ - public ParseContext transform(ParseContext pctx) throws SemanticException; + public void transform(LogicalPlan logicalPlan) throws SemanticException; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java (working copy) @@ -24,8 +24,6 @@ import java.io.Serializable; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.plan.mapredWork; @@ -35,9 +33,6 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; /** * Processor for the rule - union followed by reduce sink @@ -56,8 +51,7 @@ ReduceSinkOperator op = (ReduceSinkOperator)nd; GenMRProcContext ctx = (GenMRProcContext)opProcCtx; - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); + UnionProcContext uCtx = ctx.getLogicalPlan().getUCtx(); // union was map only - no special processing needed if (uCtx.isMapOnlySubq()) Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (working copy) @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.parse.ParseContext; /** * Processor for the rule - map join followed by reduce sink @@ -51,8 +50,6 @@ ReduceSinkOperator op = (ReduceSinkOperator)nd; GenMRProcContext ctx = (GenMRProcContext)opProcCtx; - ParseContext parseCtx = ctx.getParseCtx(); - // map-join consisted on a bunch of map-only jobs, and it has been split after the mapjoin Operator reducer = op.getChildOperators().get(0); Map, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -42,8 +42,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; @@ -62,7 +61,6 @@ import org.apache.hadoop.hive.ql.plan.tableScanDesc; import org.apache.hadoop.hive.ql.plan.partitionDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.conf.HiveConf; /** @@ -70,9 +68,6 @@ */ public class GenMRFileSink1 implements NodeProcessor { - public GenMRFileSink1() { - } - /** * File Sink Operator encountered * @param nd the file sink operator encountered @@ -80,7 +75,7 @@ */ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext)opProcCtx; - ParseContext parseCtx = ctx.getParseCtx(); + HiveConf hiveConf = ctx.getHiveConf(); boolean chDir = false; Task currTask = ctx.getCurrTask(); @@ -100,9 +95,9 @@ { // There are separate configuration parameters to control whether to merge for a map-only job // or for a map-reduce job - if ((parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) && + if ((hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) && (((mapredWork)currTask.getWork()).getReducer() == null)) || - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES)) + hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES)) chDir = true; } } @@ -119,6 +114,8 @@ } private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) { + HiveConf hiveConf = ctx.getHiveConf(); + LogicalPlan logicalPlan = ctx.getLogicalPlan(); Task currTask = ctx.getCurrTask(); RowSchema fsRS = fsOp.getSchema(); @@ -138,20 +135,23 @@ ArrayList outputColumns = new ArrayList(); for (int i = 0; i < valueCols.size(); i++) - outputColumns.add(SemanticAnalyzer.getColumnInternalName(i)); + outputColumns.add(HiveConf.getColumnInternalName(i)); reduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList(), valueCols, - outputColumns, false, -1, -1, -1); + outputColumns, false, -1, -1, -1); + + // FIXME CWS NOOP? ReduceSinkOperator rsOp = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(rsDesc, fsRS, ts_op); + + mapredWork cplan = GenMapRedUtils.getMapRedWork(); - ParseContext parseCtx = ctx.getParseCtx(); - Task mergeTask = TaskFactory.get(cplan, parseCtx.getConf()); + Task mergeTask = TaskFactory.get(cplan, hiveConf); fileSinkDesc fsConf = fsOp.getConf(); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); - RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRR(); + RowResolver interim_rwsch = logicalPlan.getRowResolver(fsOp); Integer pos = Integer.valueOf(0); for(ColumnInfo colInfo: interim_rwsch.getColumnInfos()) { String [] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); @@ -161,7 +161,7 @@ pos = Integer.valueOf(pos.intValue() + 1); } - Operator extract = + Operator extract = OperatorFactory.getAndMakeChild( new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, Utilities.ReduceField.VALUE.toString(), "", false)), @@ -171,8 +171,7 @@ fsConf.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); FileSinkOperator newOutput = (FileSinkOperator)OperatorFactory.getAndMakeChild( - new fileSinkDesc(finalName, ts, - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)), + new fileSinkDesc(finalName, ts, hiveConf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT)), fsRS, extract); cplan.setReducer(extract); @@ -183,14 +182,16 @@ cplan.getPathToPartitionInfo().put(fsConf.getDirName(), new partitionDesc(fsConf.getTableInfo(), null)); cplan.setNumReduceTasks(-1); - moveWork dummyMv = new moveWork(null, null, null, new loadFileDesc(fsOp.getConf().getDirName(), finalName, true, null, null), false); - Task dummyMergeTask = TaskFactory.get(dummyMv, ctx.getConf()); + moveWork dummyMv = new moveWork(ctx.getPhysicalPlan(), null, + new loadFileDesc(fsOp.getConf().getDirName(), finalName, true, null, null), false); + + Task dummyMergeTask = TaskFactory.get(dummyMv, hiveConf); List listWorks = new ArrayList(); listWorks.add(dummyMv); listWorks.add(mergeTask.getWork()); ConditionalWork cndWork = new ConditionalWork(listWorks); - ConditionalTask cndTsk = (ConditionalTask)TaskFactory.get(cndWork, ctx.getConf()); + ConditionalTask cndTsk = (ConditionalTask)TaskFactory.get(cndWork, hiveConf); List> listTasks = new ArrayList>(); listTasks.add(dummyMergeTask); listTasks.add(mergeTask); @@ -250,10 +251,7 @@ dest = fsOp.getConf().getDirName(); // generate the temporary file - ParseContext parseCtx = ctx.getParseCtx(); - Context baseCtx = parseCtx.getContext(); - String tmpDir = baseCtx.getMRTmpFileURI(); - + String tmpDir = ctx.getContext().getMRTmpFileURI(); fsOp.getConf().setDirName(tmpDir); } @@ -267,7 +265,6 @@ String currAliasId = ctx.getCurrAliasId(); HashMap, Task> opTaskMap = ctx.getOpTaskMap(); List> seenOps = ctx.getSeenOps(); - List> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task if (mvTask != null) @@ -283,7 +280,7 @@ seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, (mapredWork) currTask.getWork(), false, ctx); opTaskMap.put(null, currTask); - rootTasks.add(currTask); + ctx.getPhysicalPlan().addRootTask(currTask); } else { if (!seenOps.contains(currTopOp)) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpWalkerCtx.java (working copy) @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; -import java.util.HashMap; +import java.util.Map; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -35,17 +35,17 @@ * Map from tablescan operator to partition pruning predicate * that is initialized from the ParseContext */ - private HashMap opToPartPruner; + private Map opToPartPruner; /** * Constructor */ - public OpWalkerCtx(HashMap opToPartPruner) { + public OpWalkerCtx(Map opToPartPruner) { this.opToPartPruner = opToPartPruner; this.hasNonPartCols = false; } - public HashMap getOpToPartPruner() { + public Map getOpToPartPruner() { return this.opToPartPruner; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -44,7 +44,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ErrorMsg; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; @@ -70,10 +70,10 @@ * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext) */ @Override - public ParseContext transform(ParseContext pctx) throws SemanticException { + public void transform(LogicalPlan logicalPlan) throws SemanticException { // create a the context for walking operators - OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner()); + OpWalkerCtx opWalkerCtx = new OpWalkerCtx(logicalPlan.getTsOpToPartPrunerMap()); Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", "(TS%FIL%)|(TS%FIL%FIL%)"), @@ -85,11 +85,8 @@ // Create a list of topop nodes ArrayList topNodes = new ArrayList(); - topNodes.addAll(pctx.getTopOps().values()); + topNodes.addAll(logicalPlan.getTopOps()); ogw.startWalking(topNodes, null); - pctx.setHasNonPartCols(opWalkerCtx.getHasNonPartCols()); - - return pctx; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -22,7 +22,7 @@ import java.util.List; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.ppd.PredicatePushDown; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -32,15 +32,20 @@ * Implementation of the optimizer */ public class Optimizer { - private ParseContext pctx; - private List transformations; - /** - * create the list of transformations - * @param hiveConf - */ - public void initialize(HiveConf hiveConf) { - transformations = new ArrayList(); + /** + * invoke all the transformations one-by-one, and alter the query plan + * @return ParseContext + * @throws SemanticException + */ + public static void optimize(LogicalPlan logicalPlan) throws SemanticException { + for (Transform t : getTransformations(logicalPlan.getHiveConf())) { + t.transform(logicalPlan); + } + } + + private static List getTransformations(HiveConf hiveConf) { + List transformations = new ArrayList(); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) { transformations.add(new ColumnPruner()); } @@ -51,31 +56,7 @@ transformations.add(new UnionProcessor()); transformations.add(new MapJoinProcessor()); transformations.add(new JoinReorder()); + + return transformations; } - - /** - * invoke all the transformations one-by-one, and alter the query plan - * @return ParseContext - * @throws SemanticException - */ - public ParseContext optimize() throws SemanticException { - for (Transform t : transformations) - pctx = t.transform(pctx); - return pctx; - } - - /** - * @return the pctx - */ - public ParseContext getPctx() { - return pctx; - } - - /** - * @param pctx the pctx to set - */ - public void setPctx(ParseContext pctx) { - this.pctx = pctx; - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java (working copy) @@ -19,19 +19,14 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; -import java.util.ArrayList; import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -55,7 +50,7 @@ * @return The estimated size - 0 (no streamed tables), 1 (streamed tables in * subtree) or 2 (a streamed table) */ - private int getOutputSize(Operator operator, + private static int getOutputSize(Operator operator, Set bigTables) { // If a join operator contains a big subtree, there is a chance that its // output is also big, so the output size is 1 (medium) @@ -96,10 +91,10 @@ * @param joinCtx The join context * @return Set of all big tables */ - private Set getBigTables(ParseContext joinCtx) { + private static Set getBigTables(LogicalPlan logicalPlan) { Set bigTables = new HashSet(); - for (QBJoinTree qbJoin: joinCtx.getJoinContext().values()) { + for (QBJoinTree qbJoin: logicalPlan.getJoinTrees()) { if (qbJoin.getStreamAliases() != null) { bigTables.addAll(qbJoin.getStreamAliases()); } @@ -115,7 +110,7 @@ * @param joinOp The join operator to be processed * @param bigTables Set of all big tables */ - private void reorder(JoinOperator joinOp, Set bigTables) { + private static void reorder(JoinOperator joinOp, Set bigTables) { int count = joinOp.getParentOperators().size(); // Find the biggest reduce sink @@ -153,13 +148,11 @@ * * @param pactx current parse context */ - public ParseContext transform(ParseContext pactx) throws SemanticException { - Set bigTables = getBigTables(pactx); + public void transform(LogicalPlan logicalPlan) throws SemanticException { + Set bigTables = getBigTables(logicalPlan); - for (JoinOperator joinOp: pactx.getJoinContext().keySet()) { + for (JoinOperator joinOp: logicalPlan.getJoinOps()) { reorder(joinOp, bigTables); } - - return pactx; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -90,9 +91,7 @@ plan.setNumReduceTasks(desc.getNumReducers()); - List> rootTasks = opProcCtx.getRootTasks(); - - rootTasks.add(currTask); + opProcCtx.getPhysicalPlan().addRootTask(currTask); if (reducer.getClass() == JoinOperator.class) plan.setNeedsTagging(true); @@ -179,8 +178,7 @@ // The map is overloaded to keep track of mapjoins also opTaskMap.put(op, currTask); - List> rootTasks = opProcCtx.getRootTasks(); - rootTasks.add(currTask); + opProcCtx.getPhysicalPlan().addRootTask(currTask); assert currTopOp != null; List> seenOps = opProcCtx.getSeenOps(); @@ -339,8 +337,7 @@ parTask.addDependentTask(currTask); } - if (opProcCtx.getRootTasks().contains(currTask)) - opProcCtx.getRootTasks().remove(currTask); + opProcCtx.getPhysicalPlan().removeRootTask(currTask); } opProcCtx.setCurrTask(currTask); @@ -355,8 +352,7 @@ throws SemanticException { // Generate a new task mapredWork cplan = getMapRedWork(); - ParseContext parseCtx = opProcCtx.getParseCtx(); - Task redTask = TaskFactory.get(cplan, parseCtx.getConf()); + Task redTask = TaskFactory.get(cplan, opProcCtx.getHiveConf()); Operator reducer = op.getChildOperators().get(0); // Add the reducer @@ -384,8 +380,8 @@ public static void setTaskPlan(String alias_id, Operator topOp, mapredWork plan, boolean local, GenMRProcContext opProcCtx) throws SemanticException { - ParseContext parseCtx = opProcCtx.getParseCtx(); - Set inputs = opProcCtx.getInputs(); + LogicalPlan logicalPlan = opProcCtx.getLogicalPlan(); + PhysicalPlan physicalPlan = opProcCtx.getPhysicalPlan(); ArrayList partDir = new ArrayList(); ArrayList partDesc = new ArrayList(); @@ -396,9 +392,10 @@ PrunedPartitionList partsList = null; try { - partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), - opProcCtx.getConf(), alias_id); + partsList = PartitionPruner.prune(logicalPlan.getTable((TableScanOperator)topOp), + logicalPlan.getPartPruner((TableScanOperator)topOp), + opProcCtx.getHiveConf(), + alias_id); } catch (SemanticException e) { throw e; } catch (HiveException e) { @@ -427,13 +424,14 @@ } plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc); - SamplePruner samplePruner = parseCtx.getAliasToSamplePruner().get(alias_id); + SamplePruner samplePruner = logicalPlan.getSamplePruner(alias_id); for (Partition part : parts) { - if (part.getTable().isPartitioned()) - inputs.add(new ReadEntity(part)); - else - inputs.add(new ReadEntity(part.getTable())); + if (part.getTable().isPartitioned()) { + physicalPlan.addInput(new ReadEntity(part)); + } else { + physicalPlan.addInput(new ReadEntity(part.getTable())); + } // Later the properties have to come from the partition as opposed // to from the table in order to support versioning. @@ -586,19 +584,6 @@ return work; } - /** - * insert in the map for the operator to row resolver - * @param op operator created - * @param rr row resolver - * @param parseCtx parse context - */ - @SuppressWarnings("nls") - private static Operator putOpInsertMap(Operator op, RowResolver rr, ParseContext parseCtx) - { - OpParseContext ctx = new OpParseContext(rr); - parseCtx.getOpParseCtx().put(op, ctx); - return op; - } @SuppressWarnings("nls") /** @@ -615,19 +600,18 @@ Task childTask, GenMRProcContext opProcCtx, boolean setReducer, boolean local, int posn) throws SemanticException { - mapredWork plan = (mapredWork) childTask.getWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); - - ParseContext parseCtx = opProcCtx.getParseCtx(); + HiveConf hiveConf = opProcCtx.getHiveConf(); + LogicalPlan logicalPlan = opProcCtx.getLogicalPlan(); + PhysicalPlan physicalPlan = opProcCtx.getPhysicalPlan(); + parentTask.addDependentTask(childTask); // Root Task cannot depend on any other task, therefore childTask cannot be a root Task - List> rootTasks = opProcCtx.getRootTasks(); - if (rootTasks.contains(childTask)) - rootTasks.remove(childTask); + physicalPlan.removeRootTask(childTask); // generate the temporary file - Context baseCtx = parseCtx.getContext(); + Context baseCtx = opProcCtx.getContext(); String taskTmpDir = baseCtx.getMRTmpFileURI(); Operator parent = op.getParentOperators().get(posn); @@ -635,13 +619,15 @@ PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); // Create a file sink operator for this file name - boolean compressIntermediate = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE); + boolean compressIntermediate = hiveConf.getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE); fileSinkDesc desc = new fileSinkDesc(taskTmpDir, tt_desc, compressIntermediate); if (compressIntermediate) { - desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC)); - desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE)); + desc.setCompressCodec(hiveConf.getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC)); + desc.setCompressType(hiveConf.getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE)); } - Operator fs_op = putOpInsertMap(OperatorFactory.get(desc, parent.getSchema()), null, parseCtx); + + Operator fs_op = OperatorFactory.get(desc, parent.getSchema()); + logicalPlan.setRowResolver(fs_op, null); // replace the reduce child with this operator List> childOpList = parent.getChildOperators(); @@ -657,8 +643,8 @@ fs_op.setParentOperators(parentOpList); // create a dummy tableScan operator on top of op - Operator ts_op = - putOpInsertMap(OperatorFactory.get(tableScanDesc.class, parent.getSchema()), null, parseCtx); + Operator ts_op = OperatorFactory.get(tableScanDesc.class, parent.getSchema()); + logicalPlan.setRowResolver(ts_op, null); childOpList = new ArrayList>(); childOpList.add(op); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy) @@ -35,10 +35,9 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.ErrorMsg; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; @@ -122,8 +121,7 @@ GenMRProcContext opProcCtx = (GenMRProcContext)procCtx; mapredWork cplan = GenMapRedUtils.getMapRedWork(); - ParseContext parseCtx = opProcCtx.getParseCtx(); - Task redTask = TaskFactory.get(cplan, parseCtx.getConf()); + Task redTask = TaskFactory.get(cplan, opProcCtx.getHiveConf()); Task currTask = opProcCtx.getCurrTask(); // find the branch on which this processor was invoked @@ -168,15 +166,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + GenMRProcContext ctx = (GenMRProcContext)procCtx; + HiveConf hiveConf = ctx.getHiveConf(); + LogicalPlan logicalPlan = ctx.getLogicalPlan(); + SelectOperator sel = (SelectOperator)nd; MapJoinOperator mapJoin = (MapJoinOperator)sel.getParentOperators().get(0); assert sel.getParentOperators().size() == 1; + - GenMRProcContext ctx = (GenMRProcContext)procCtx; - ParseContext parseCtx = ctx.getParseCtx(); - // is the mapjoin followed by a reducer - List listMapJoinOps = parseCtx.getListMapJoinOpsNoReducer(); + List listMapJoinOps = logicalPlan.getMapJoinOpsNoReducer(); if (listMapJoinOps.contains(mapJoin)) { ctx.setCurrAliasId(null); @@ -196,14 +196,14 @@ } mapredWork mjPlan = GenMapRedUtils.getMapRedWork(); - Task mjTask = TaskFactory.get(mjPlan, parseCtx.getConf()); + Task mjTask = TaskFactory.get(mjPlan, hiveConf); tableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc( PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); // generate the temporary file - Context baseCtx = parseCtx.getContext(); + Context baseCtx = ctx.getContext(); String taskTmpDir = baseCtx.getMRTmpFileURI(); // Add the path to alias mapping @@ -217,7 +217,7 @@ Operator fs_op = OperatorFactory.get (new fileSinkDesc(taskTmpDir, tt_desc, - parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), + hiveConf.getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema()); assert mapJoin.getChildOperators().size() == 1; @@ -251,7 +251,6 @@ MapJoinOperator mapJoin = (MapJoinOperator)nd; GenMRProcContext ctx = (GenMRProcContext)procCtx; - ParseContext parseCtx = ctx.getParseCtx(); MapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp(); assert oldMapJoin != null; GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); @@ -268,7 +267,6 @@ GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(pos)); Task currTask = mapredCtx.getCurrTask(); mapredWork currPlan = (mapredWork) currTask.getWork(); - String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = mapJoin; HashMap, Task> opTaskMap = ctx.getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); @@ -301,17 +299,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext)procCtx; + LogicalPlan logicalPlan = ctx.getLogicalPlan(); + UnionProcContext uCtx = logicalPlan.getUCtx(); - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); - // union was map only - no special processing needed if (uCtx.isMapOnlySubq()) return (new TableScanMapJoin()).process(nd, stack, procCtx, nodeOutputs); UnionOperator currUnion = ctx.getCurrUnionOp(); assert currUnion != null; - GenMRUnionCtx unionCtx = ctx.getUnionTask(currUnion); MapJoinOperator mapJoin = (MapJoinOperator)nd; // find the branch on which this processor was invoked Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java (working copy) @@ -20,7 +20,6 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -35,9 +34,8 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.OpParseContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; /** @@ -48,38 +46,15 @@ * changes the row resolver, the tree is built again. This can be optimized later to patch the tree. */ public class ColumnPruner implements Transform { - protected ParseContext pGraphContext; - private HashMap, OpParseContext> opToParseCtxMap; - - - /** - * empty constructor - */ - public ColumnPruner() { - pGraphContext = null; - } - - /** - * update the map between operator and row resolver - * @param op operator being inserted - * @param rr row resolver of the operator - * @return - */ - @SuppressWarnings("nls") - private Operator putOpInsertMap(Operator op, RowResolver rr) { - OpParseContext ctx = new OpParseContext(rr); - pGraphContext.getOpParseCtx().put(op, ctx); - return op; - } /** * Transform the query tree. For each table under consideration, check if all columns are needed. If not, * only select the operators needed at the beginning and proceed * @param pactx the current parse context */ - public ParseContext transform(ParseContext pactx) throws SemanticException { - this.pGraphContext = pactx; - this.opToParseCtxMap = pGraphContext.getOpParseCtx(); + public void transform(LogicalPlan logicalPlan) throws SemanticException { + Map, OpParseContext> opToParseCtxMap = + logicalPlan.getOpToParseContextMap(); // generate pruned column list for all relevant operators ColumnPrunerProcCtx cppCtx = new ColumnPrunerProcCtx(opToParseCtxMap); @@ -101,9 +76,8 @@ // Create a list of topop nodes ArrayList topNodes = new ArrayList(); - topNodes.addAll(pGraphContext.getTopOps().values()); + topNodes.addAll(logicalPlan.getTopOps()); ogw.startWalking(topNodes, null); - return pGraphContext; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (working copy) @@ -29,7 +29,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.optimizer.Transform; @@ -46,16 +46,11 @@ public class UnionProcessor implements Transform { /** - * empty constructor - */ - public UnionProcessor() { } - - /** * Transform the query tree. For each union, store the fact whether both the * sub-queries are map-only * @param pCtx the current parse context */ - public ParseContext transform(ParseContext pCtx) throws SemanticException { + public void transform(LogicalPlan logicalPlan) throws SemanticException { // create a walker which walks the tree in a DFS manner while maintaining the operator stack. Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp(new String("R1"), "RS%.*UNION%"), UnionProcFactory.getMapRedUnion()); @@ -69,10 +64,8 @@ // Create a list of topop nodes ArrayList topNodes = new ArrayList(); - topNodes.addAll(pCtx.getTopOps().values()); + topNodes.addAll(logicalPlan.getTopOps()); ogw.startWalking(topNodes, null); - pCtx.setUCtx(uCtx); - - return pCtx; + logicalPlan.setUCtx(uCtx); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java (working copy) @@ -25,8 +25,6 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -43,12 +41,12 @@ private Map,List> prunedColLists; - private HashMap, OpParseContext> opToParseCtxMap; + private Map, OpParseContext> opToParseCtxMap; private Map>> joinPrunedColLists; - public ColumnPrunerProcCtx(HashMap, OpParseContext> opToParseContextMap) { + public ColumnPrunerProcCtx(Map, OpParseContext> opToParseContextMap) { prunedColLists = new HashMap, List>(); this.opToParseCtxMap = opToParseContextMap; joinPrunedColLists = new HashMap>>(); @@ -65,7 +63,7 @@ return prunedColLists.get(op); } - public HashMap, OpParseContext> getOpToParseCtxMap() { + public Map, OpParseContext> getOpToParseCtxMap() { return opToParseCtxMap; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -25,7 +25,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -46,8 +45,7 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.ErrorMsg; import org.apache.hadoop.hive.ql.parse.GenMapRedWalker; -import org.apache.hadoop.hive.ql.parse.OpParseContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -69,30 +67,15 @@ * In future, once statistics are implemented, this transformation can also be done based on costs. */ public class MapJoinProcessor implements Transform { - private ParseContext pGraphContext; - - /** - * empty constructor - */ - public MapJoinProcessor() { - pGraphContext = null; - } - - @SuppressWarnings("nls") - private Operator putOpInsertMap(Operator op, RowResolver rr) { - OpParseContext ctx = new OpParseContext(rr); - pGraphContext.getOpParseCtx().put(op, ctx); - return op; - } - /** * convert a regular join to a a map-side join. * @param op join operator * @param qbJoin qb join tree * @param mapJoinPos position of the source to be read as part of map-reduce framework. All other sources are cached in memory */ - private MapJoinOperator convertMapJoin(ParseContext pctx, JoinOperator op, QBJoinTree joinTree, int mapJoinPos) throws SemanticException { + private static MapJoinOperator convertMapJoin(LogicalPlan logicalPlan, JoinOperator op, QBJoinTree joinTree, int mapJoinPos) + throws SemanticException { // outer join cannot be performed on a table which is being cached joinDesc desc = op.getConf(); org.apache.hadoop.hive.ql.plan.joinCond[] condns = desc.getConds(); @@ -105,7 +88,7 @@ throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg()); } - RowResolver oldOutputRS = pctx.getOpParseCtx().get(op).getRR(); + RowResolver oldOutputRS = logicalPlan.getRowResolver(op); RowResolver outputRS = new RowResolver(); ArrayList outputColumnNames = new ArrayList(); Map> keyExprMap = new HashMap>(); @@ -158,7 +141,7 @@ // create the map-join operator for (pos = 0; pos < newParentOps.size(); pos++) { - RowResolver inputRS = pGraphContext.getOpParseCtx().get(newParentOps.get(pos)).getRR(); + RowResolver inputRS = logicalPlan.getRowResolver(newParentOps.get(pos)); List values = new ArrayList(); @@ -242,10 +225,13 @@ valueTableDescs.add(valueTableDesc); } - - MapJoinOperator mapJoinOp = (MapJoinOperator)putOpInsertMap(OperatorFactory.getAndMakeChild( - new mapJoinDesc(keyExprMap, keyTableDesc, valueExprMap, valueTableDescs, outputColumnNames, mapJoinPos, joinCondns), - new RowSchema(outputRS.getColumnInfos()), newPar), outputRS); + + mapJoinDesc mjdesc = new mapJoinDesc(keyExprMap, keyTableDesc, valueExprMap, valueTableDescs, + outputColumnNames, mapJoinPos, joinCondns); + MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory + .getAndMakeChild(mjdesc, new RowSchema(outputRS.getColumnInfos()), newPar); + logicalPlan.setRowResolver(mapJoinOp, outputRS); + mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs()); mapJoinOp.setColumnExprMap(colExprMap); @@ -261,20 +247,21 @@ op.setParentOperators(null); // create a dummy select to select all columns - genSelectPlan(pctx, mapJoinOp); + genSelectPlan(logicalPlan, mapJoinOp); return mapJoinOp; } - private void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException { - List> childOps = input.getChildOperators(); - input.setChildOperators(null); + private static void genSelectPlan(LogicalPlan logicalPlan, MapJoinOperator mapJoinOp) + throws SemanticException { + List> childOps = mapJoinOp.getChildOperators(); + mapJoinOp.setChildOperators(null); // create a dummy select - This select is needed by the walker to split the mapJoin later on - RowResolver inputRR = pctx.getOpParseCtx().get(input).getRR(); + RowResolver inputRR = logicalPlan.getRowResolver(mapJoinOp); ArrayList exprs = new ArrayList(); ArrayList outputs = new ArrayList(); - List outputCols = input.getConf().getOutputColumnNames(); + List outputCols = mapJoinOp.getConf().getOutputColumnNames(); RowResolver outputRS = new RowResolver(); Map colExprMap = new HashMap(); @@ -292,18 +279,18 @@ colExprMap.put(internalName, colDesc); } - selectDesc select = new selectDesc(exprs, outputs, false); + selectDesc seldesc = new selectDesc(exprs, outputs, false); + SelectOperator selOp = (SelectOperator) OperatorFactory + .getAndMakeChild(seldesc, new RowSchema(inputRR.getColumnInfos()), mapJoinOp); - SelectOperator sel = - (SelectOperator)putOpInsertMap(OperatorFactory.getAndMakeChild( - select, new RowSchema(inputRR.getColumnInfos()), input), inputRR); + logicalPlan.setRowResolver(selOp, inputRR); + + selOp.setColumnExprMap(colExprMap); - sel.setColumnExprMap(colExprMap); - // Insert the select operator in between. - sel.setChildOperators(childOps); + selOp.setChildOperators(childOps); for (Operator ch: childOps) { - ch.replaceParent(input, sel); + ch.replaceParent(mapJoinOp, selOp); } } @@ -313,7 +300,8 @@ * @param qbJoin qb join tree * @return -1 if it cannot be converted to a map-side join, position of the map join node otherwise */ - private int mapSideJoin(JoinOperator op, QBJoinTree joinTree) throws SemanticException { + private static int mapSideJoin(JoinOperator op, QBJoinTree joinTree) + throws SemanticException { int mapJoinPos = -1; if (joinTree.isMapSideJoin()) { int pos = 0; @@ -334,8 +322,11 @@ // All tables are to be cached - this is not possible. In future, we can support this by randomly // leaving some table from the list of tables to be cached - if (mapJoinPos == -1) - throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_HINT.getMsg(pGraphContext.getQB().getParseInfo().getHints())); + if (mapJoinPos == -1) { + // TODO CWS Optimizer refers to QB... + //throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_HINT.getMsg(sem.getQB().getParseInfo().getHints())); + throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_HINT.getMsg("Hint from the QB")); + } } return mapJoinPos; @@ -346,31 +337,17 @@ * convert it to a map-side join. * @param pactx current parse context */ - public ParseContext transform(ParseContext pactx) throws SemanticException { - this.pGraphContext = pactx; + public void transform(LogicalPlan logicalPlan) throws SemanticException { List listMapJoinOps = new ArrayList(); // traverse all the joins and convert them if necessary - if (pGraphContext.getJoinContext() != null) { - Map joinMap = new HashMap(); - - Set> joinCtx = pGraphContext.getJoinContext().entrySet(); - Iterator> joinCtxIter = joinCtx.iterator(); - while (joinCtxIter.hasNext()) { - Map.Entry joinEntry = joinCtxIter.next(); - JoinOperator joinOp = joinEntry.getKey(); - QBJoinTree qbJoin = joinEntry.getValue(); - int mapJoinPos = mapSideJoin(joinOp, qbJoin); - if (mapJoinPos >= 0) { - listMapJoinOps.add(convertMapJoin(pactx, joinOp, qbJoin, mapJoinPos)); - } - else { - joinMap.put(joinOp, qbJoin); - } + for (JoinOperator joinOp : logicalPlan.getJoinOps()) { + QBJoinTree qbJoin = logicalPlan.getJoinTree(joinOp); + int mapJoinPos = mapSideJoin(joinOp, qbJoin); + if (mapJoinPos >= 0) { + logicalPlan.removeJoinTree(joinOp); + listMapJoinOps.add(convertMapJoin(logicalPlan, joinOp, qbJoin, mapJoinPos)); } - - // store the new joinContext - pGraphContext.setJoinContext(joinMap); } // Go over the list and find if a reducer is not needed @@ -393,8 +370,7 @@ topNodes.addAll(listMapJoinOps); ogw.startWalking(topNodes, null); - pGraphContext.setListMapJoinOpsNoReducer(listMapJoinOpsNoRed); - return pGraphContext; + logicalPlan.setListMapJoinOpsNoReducer(listMapJoinOpsNoRed); } public static class CurrentMapJoin implements NodeProcessor { Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpWalkerInfo.java (working copy) @@ -39,8 +39,7 @@ private Map, OpParseContext> opToParseCtxMap; - public OpWalkerInfo( - HashMap, OpParseContext> opToParseCtxMap) { + public OpWalkerInfo(Map, OpParseContext> opToParseCtxMap) { this.opToParseCtxMap = opToParseCtxMap; this.opToPushdownPredMap = new HashMap, ExprWalkerInfo>(); } Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (working copy) @@ -17,13 +17,10 @@ */ package org.apache.hadoop.hive.ql.ppd; -import java.io.Serializable; import java.util.ArrayList; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -33,8 +30,7 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.optimizer.Transform; -import org.apache.hadoop.hive.ql.parse.OpParseContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.LogicalPlan; import org.apache.hadoop.hive.ql.parse.SemanticException; /** @@ -73,16 +69,11 @@ */ public class PredicatePushDown implements Transform { - private ParseContext pGraphContext; - private HashMap, OpParseContext> opToParseCtxMap; - @Override - public ParseContext transform(ParseContext pctx) throws SemanticException { - this.pGraphContext = pctx; - this.opToParseCtxMap = pGraphContext.getOpParseCtx(); + public void transform(LogicalPlan logicalPlan) throws SemanticException { // create a the context for walking operators - OpWalkerInfo opWalkerInfo = new OpWalkerInfo(opToParseCtxMap); + OpWalkerInfo opWalkerInfo = new OpWalkerInfo(logicalPlan.getOpToParseContextMap()); Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", "FIL%"), OpProcFactory.getFilterProc()); @@ -98,10 +89,8 @@ // Create a list of topop nodes ArrayList topNodes = new ArrayList(); - topNodes.addAll(pGraphContext.getTopOps().values()); + topNodes.addAll(logicalPlan.getTopOps()); ogw.startWalking(topNodes, null); - - return pGraphContext; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (working copy) @@ -36,7 +36,7 @@ * Task implementation **/ -public abstract class Task implements Serializable { +public abstract class Task implements Serializable { private static final long serialVersionUID = 1L; transient protected boolean started; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (working copy) @@ -69,6 +69,11 @@ this.isPartitionCol = isPartitionCol; } + public ColumnInfo(ColumnInfo columnInfo) { + this(columnInfo.getInternalName(), columnInfo.getType(), + columnInfo.getTabAlias(), columnInfo.getIsPartitionCol()); + } + public TypeInfo getType() { return type; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (working copy) @@ -24,6 +24,7 @@ import java.lang.annotation.Annotation; import java.lang.reflect.Method; import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; import java.util.HashSet; import java.util.List; @@ -99,7 +100,7 @@ if (isPrintable(ent.getValue())) { out.print(ent.getValue()); out.println(); - } else if (ent.getValue() instanceof List) { + } else if (ent.getValue() instanceof Collection) { out.print(ent.getValue().toString()); out.println(); } else if (ent.getValue() instanceof Serializable) { @@ -320,7 +321,7 @@ } public void outputDependencies(PrintStream out, - List> rootTasks, + Collection> rootTasks, int indent) throws Exception { out.print(indentString(indent)); @@ -331,7 +332,7 @@ } public void outputStagePlans(PrintStream out, - List> rootTasks, + Collection> rootTasks, int indent) throws Exception { out.print(indentString(indent)); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java (working copy) @@ -35,6 +35,13 @@ public RowSchema(Vector signature) { this.signature = signature; } + + public RowSchema(RowSchema rowSchema) { + signature = new Vector(); + for (ColumnInfo columnInfo : rowSchema.getSignature()) { + signature.add(new ColumnInfo(columnInfo)); + } + } public void setSignature(Vector signature) { this.signature = signature; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -29,7 +29,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -61,6 +60,7 @@ import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.PhysicalPlan; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.MsckDesc; @@ -214,7 +214,7 @@ } Partition part = db.getPartition(tbl, addPartitionDesc.getPartSpec(), false); - work.getOutputs().add(new WriteEntity(part)); + work.getPhysicalPlan().addOutput(new WriteEntity(part)); return 0; } @@ -990,8 +990,9 @@ // This is kind of hacky - the read entity contains the old table, whereas the write entity // contains the new table. This is needed for rename - both the old and the new table names are // passed - work.getInputs().add(new ReadEntity(oldTbl)); - work.getOutputs().add(new WriteEntity(tbl)); + PhysicalPlan physicalPlan = work.getPhysicalPlan(); + physicalPlan.addInput(new ReadEntity(oldTbl)); + physicalPlan.addOutput(new WriteEntity(tbl)); return 0; } @@ -1017,7 +1018,7 @@ // drop the table db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl.getTableName()); if (tbl != null) - work.getOutputs().add(new WriteEntity(tbl)); + work.getPhysicalPlan().addOutput(new WriteEntity(tbl)); } else { // get all partitions of the table List partitionNames = db.getPartitionNames(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl.getTableName(), (short)-1); @@ -1056,7 +1057,7 @@ db.dropPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, dropTbl .getTableName(), partition.getValues(), true); // drop data for the // partition - work.getOutputs().add(new WriteEntity(partition)); + work.getPhysicalPlan().addOutput(new WriteEntity(partition)); } } @@ -1202,7 +1203,7 @@ // create the table db.createTable(tbl, crtTbl.getIfNotExists()); - work.getOutputs().add(new WriteEntity(tbl)); + work.getPhysicalPlan().addOutput(new WriteEntity(tbl)); return 0; } @@ -1237,7 +1238,7 @@ // create the table db.createTable(tbl, crtTbl.getIfNotExists()); - work.getOutputs().add(new WriteEntity(tbl)); + work.getPhysicalPlan().addOutput(new WriteEntity(tbl)); return 0; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (working copy) @@ -47,10 +47,6 @@ private static final long serialVersionUID = 1L; - public MoveTask() { - super(); - } - public int execute() { try { @@ -137,15 +133,24 @@ if(tbd.getPartitionSpec().size() == 0) { db.loadTable(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(), tbd.getReplace(), new Path(tbd.getTmpDir())); + // TODO CWS possible bug + /* if (work.getOutputs() != null) work.getOutputs().add(new WriteEntity(table)); + */ + work.getPhysicalPlan().addOutput(new WriteEntity(table)); } else { LOG.info("Partition is: " + tbd.getPartitionSpec().toString()); db.loadPartition(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), new Path(tbd.getTmpDir())); Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); + + // TODO CWS possible bug + /* if (work.getOutputs() != null) work.getOutputs().add(new WriteEntity(partn)); + */ + work.getPhysicalPlan().addOutput(new WriteEntity(partn)); } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (working copy) @@ -18,11 +18,9 @@ package org.apache.hadoop.hive.ql.plan; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.parse.PhysicalPlan; import java.io.Serializable; -import java.util.Set; public class DDLWork implements Serializable { private static final long serialVersionUID = 1L; @@ -39,36 +37,26 @@ private MsckDesc msckDesc; private showTableStatusDesc showTblStatusDesc; - /** - * ReadEntitites that are passed to the hooks. - */ - protected Set inputs; - /** - * List of WriteEntities that are passed to the hooks. - */ - protected Set outputs; - public DDLWork() { + private PhysicalPlan physicalPlan; + + public DDLWork(PhysicalPlan physicalPlan) { + this.physicalPlan = physicalPlan; } - public DDLWork(Set inputs, Set outputs) { - this.inputs = inputs; - this.outputs = outputs; - } - /** * @param alterTblDesc alter table descriptor */ - public DDLWork(Set inputs, Set outputs, alterTableDesc alterTblDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, alterTableDesc alterTblDesc) { + this(physicalPlan); this.alterTblDesc = alterTblDesc; } /** * @param createTblDesc create table descriptor */ - public DDLWork(Set inputs, Set outputs, createTableDesc createTblDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, createTableDesc createTblDesc) { + this(physicalPlan); this.createTblDesc = createTblDesc; } @@ -76,8 +64,8 @@ /** * @param createTblLikeDesc create table dlike escriptor */ - public DDLWork(Set inputs, Set outputs, createTableLikeDesc createTblLikeDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, createTableLikeDesc createTblLikeDesc) { + this(physicalPlan); this.createTblLikeDesc = createTblLikeDesc; } @@ -85,8 +73,8 @@ /** * @param dropTblDesc drop table descriptor */ - public DDLWork(Set inputs, Set outputs, dropTableDesc dropTblDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, dropTableDesc dropTblDesc) { + this(physicalPlan); this.dropTblDesc = dropTblDesc; } @@ -94,8 +82,8 @@ /** * @param descTblDesc */ - public DDLWork(Set inputs, Set outputs, descTableDesc descTblDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, descTableDesc descTblDesc) { + this(physicalPlan); this.descTblDesc = descTblDesc; } @@ -103,8 +91,8 @@ /** * @param showTblsDesc */ - public DDLWork(Set inputs, Set outputs, showTablesDesc showTblsDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, showTablesDesc showTblsDesc) { + this(physicalPlan); this.showTblsDesc = showTblsDesc; } @@ -112,8 +100,8 @@ /** * @param showFuncsDesc */ - public DDLWork(Set inputs, Set outputs, showFunctionsDesc showFuncsDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, showFunctionsDesc showFuncsDesc) { + this(physicalPlan); this.showFuncsDesc = showFuncsDesc; } @@ -121,8 +109,8 @@ /** * @param descFuncDesc */ - public DDLWork(Set inputs, Set outputs, descFunctionDesc descFuncDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, descFunctionDesc descFuncDesc) { + this(physicalPlan); this.descFunctionDesc = descFuncDesc; } @@ -130,8 +118,8 @@ /** * @param showPartsDesc */ - public DDLWork(Set inputs, Set outputs, showPartitionsDesc showPartsDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, showPartitionsDesc showPartsDesc) { + this(physicalPlan); this.showPartsDesc = showPartsDesc; } @@ -140,14 +128,14 @@ * @param addPartitionDesc information about the partitions * we want to add. */ - public DDLWork(Set inputs, Set outputs, AddPartitionDesc addPartitionDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, AddPartitionDesc addPartitionDesc) { + this(physicalPlan); this.addPartitionDesc = addPartitionDesc; } - public DDLWork(Set inputs, Set outputs, MsckDesc checkDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, MsckDesc checkDesc) { + this(physicalPlan); this.msckDesc = checkDesc; } @@ -155,12 +143,16 @@ /** * @param showTblStatusDesc show table status descriptor */ - public DDLWork(Set inputs, Set outputs, showTableStatusDesc showTblStatusDesc) { - this(inputs, outputs); + public DDLWork(PhysicalPlan physicalPlan, showTableStatusDesc showTblStatusDesc) { + this(physicalPlan); this.showTblStatusDesc = showTblStatusDesc; } + public PhysicalPlan getPhysicalPlan() { + return physicalPlan; + } + /** * @return the createTblDesc */ @@ -340,20 +332,5 @@ this.showTblStatusDesc = showTblStatusDesc; } - public Set getInputs() { - return inputs; - } - public Set getOutputs() { - return outputs; - } - - public void setInputs(Set inputs) { - this.inputs = inputs; - } - - public void setOutputs(Set outputs) { - this.outputs = outputs; - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/explainWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/explainWork.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/explainWork.java (working copy) @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.List; +import java.util.Set; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Task; @@ -55,11 +56,8 @@ public List> getRootTasks() { return rootTasks; } + - public void setRootTasks(List> rootTasks) { - this.rootTasks = rootTasks; - } - public String getAstStringTree() { return astStringTree; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java (working copy) @@ -20,9 +20,7 @@ import java.io.*; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import java.util.Set; +import org.apache.hadoop.hive.ql.parse.PhysicalPlan; @explain(displayName="Move Operator") @@ -32,31 +30,19 @@ private loadFileDesc loadFileWork; private boolean checkFileFormat; + + private PhysicalPlan physicalPlan; - /** - * ReadEntitites that are passed to the hooks. - */ - protected Set inputs; - /** - * List of WriteEntities that are passed to the hooks. - */ - protected Set outputs; - - public moveWork() { + public moveWork(PhysicalPlan physicalPlan) { + this.physicalPlan = physicalPlan; } - public moveWork(Set inputs, Set outputs) { - this.inputs = inputs; - this.outputs = outputs; - } - public moveWork( - Set inputs, - Set outputs, - final loadTableDesc loadTableWork, - final loadFileDesc loadFileWork, - boolean checkFileFormat) { - this(inputs, outputs); + PhysicalPlan physicalPlan, + loadTableDesc loadTableWork, + loadFileDesc loadFileWork, + boolean checkFileFormat) { + this(physicalPlan); this.loadTableWork = loadTableWork; this.loadFileWork = loadFileWork; this.checkFileFormat = checkFileFormat; @@ -84,20 +70,7 @@ this.checkFileFormat = checkFileFormat; } - public Set getInputs() { - return inputs; + public PhysicalPlan getPhysicalPlan() { + return physicalPlan; } - - public Set getOutputs() { - return outputs; - } - - public void setInputs(Set inputs) { - this.inputs = inputs; - } - - public void setOutputs(Set outputs) { - this.outputs = outputs; - } - } Index: ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (working copy) @@ -51,22 +51,19 @@ public class QueryPlan implements Serializable { private static final long serialVersionUID = 1L; - static final private Log LOG = LogFactory.getLog(QueryPlan.class.getName()); private String queryString; - private BaseSemanticAnalyzer plan; + private BaseSemanticAnalyzer sem; private String queryId; private org.apache.hadoop.hive.ql.plan.api.Query query; private Map> counters; private Set done; private Set started; - private boolean add; - - public QueryPlan(String queryString, BaseSemanticAnalyzer plan) { + public QueryPlan(String queryString, BaseSemanticAnalyzer sem) { this.queryString = queryString; - this.plan = plan; + this.sem = sem; this.queryId = makeQueryId(); query = new org.apache.hadoop.hive.ql.plan.api.Query(); query.setQueryId(this.queryId); @@ -81,7 +78,7 @@ } public BaseSemanticAnalyzer getPlan() { - return plan; + return sem; } public String getQueryId() { @@ -152,7 +149,7 @@ Queue> tasksToVisit = new LinkedList>(); Set> tasksVisited = new HashSet>(); - tasksToVisit.addAll(plan.getRootTasks()); + tasksToVisit.addAll(sem.getPhysicalPlan().getRootTasks()); while (tasksToVisit.size() != 0) { Task task = tasksToVisit.remove(); tasksVisited.add(task); @@ -270,7 +267,7 @@ private void extractCounters() throws IOException { Queue> tasksToVisit = new LinkedList>(); Set> tasksVisited = new HashSet>(); - tasksToVisit.addAll(plan.getRootTasks()); + tasksToVisit.addAll(sem.getPhysicalPlan().getRootTasks()); while (tasksToVisit.peek() != null) { Task task = tasksToVisit.remove(); tasksVisited.add(task); Index: ql/src/java/org/apache/hadoop/hive/ql/Context.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Context.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java (working copy) @@ -23,7 +23,6 @@ import java.io.IOException; import java.io.FileNotFoundException; import java.net.URI; -import java.net.URISyntaxException; import java.util.Random; import java.util.ArrayList; @@ -55,7 +54,7 @@ private Path MRScratchDir; private Path localScratchDir; private ArrayList allScratchDirs = new ArrayList (); - private HiveConf conf; + private HiveConf hiveConf; Random rand = new Random (); protected int randomid = Math.abs(rand.nextInt()); protected int pathid = 10000; @@ -64,12 +63,16 @@ public Context() { } - public Context(HiveConf conf) { - this.conf = conf; - Path tmpPath = new Path(conf.getVar(HiveConf.ConfVars.SCRATCHDIR)); + public Context(HiveConf hiveConf) { + this.hiveConf = hiveConf; + Path tmpPath = new Path(hiveConf.getVar(HiveConf.ConfVars.SCRATCHDIR)); scratchPath = tmpPath.toUri().getPath(); } + public HiveConf getHiveConf() { + return hiveConf; + } + /** * Set the context on whether the current query is an explain query * @param value true if the query is an explain query, false if not @@ -93,7 +96,7 @@ while (true) { localScratchDir = new Path(System.getProperty("java.io.tmpdir") + File.separator + Math.abs(rand.nextInt())); - FileSystem fs = FileSystem.getLocal(conf); + FileSystem fs = FileSystem.getLocal(hiveConf); if (fs.mkdirs(localScratchDir)) { localScratchDir = fs.makeQualified(localScratchDir); allScratchDirs.add(localScratchDir); @@ -110,15 +113,15 @@ private void makeMRScratchDir() throws IOException { while(true) { MRScratchDir = FileUtils.makeQualified - (new Path(conf.getVar(HiveConf.ConfVars.SCRATCHDIR), - Integer.toString(Math.abs(rand.nextInt()))), conf); + (new Path(hiveConf.getVar(HiveConf.ConfVars.SCRATCHDIR), + Integer.toString(Math.abs(rand.nextInt()))), hiveConf); if (explain) { allScratchDirs.add(MRScratchDir); return; } - FileSystem fs = MRScratchDir.getFileSystem(conf); + FileSystem fs = MRScratchDir.getFileSystem(hiveConf); if (fs.mkdirs(MRScratchDir)) { allScratchDirs.add(MRScratchDir); return; @@ -142,7 +145,7 @@ return extScratchDir; } - FileSystem fs = extScratchDir.getFileSystem(conf); + FileSystem fs = extScratchDir.getFileSystem(hiveConf); if (fs.mkdirs(extScratchDir)) { allScratchDirs.add(extScratchDir); return extScratchDir; @@ -206,14 +209,14 @@ if (explain) { try { if (localScratchDir != null) - FileSystem.getLocal(conf).delete(localScratchDir, true); + FileSystem.getLocal(hiveConf).delete(localScratchDir, true); } catch (Exception e) { LOG.warn("Error Removing Scratch: " + StringUtils.stringifyException(e)); } } else { for (Path p: allScratchDirs) { try { - p.getFileSystem(conf).delete(p, true); + p.getFileSystem(hiveConf).delete(p, true); } catch (Exception e) { LOG.warn("Error Removing Scratch: " + StringUtils.stringifyException(e)); } @@ -309,7 +312,7 @@ { try { - FileSystem fs = resDir.getFileSystem(conf); + FileSystem fs = resDir.getFileSystem(hiveConf); fs.delete(resDir, true); } catch (IOException e) { LOG.info("Context clear error: " + StringUtils.stringifyException(e)); @@ -320,7 +323,7 @@ { try { - FileSystem fs = resFile.getFileSystem(conf); + FileSystem fs = resFile.getFileSystem(hiveConf); fs.delete(resFile, false); } catch (IOException e) { LOG.info("Context clear error: " + StringUtils.stringifyException(e)); @@ -337,10 +340,10 @@ if ((resFile == null) && (resDir == null)) return null; if (resFile != null) { - return (DataInput)resFile.getFileSystem(conf).open(resFile); + return (DataInput)resFile.getFileSystem(hiveConf).open(resFile); } - resFs = resDir.getFileSystem(conf); + resFs = resDir.getFileSystem(hiveConf); FileStatus status = resFs.getFileStatus(resDir); assert status.isDir(); FileStatus[] resDirFS = resFs.globStatus(new Path(resDir + "/*")); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DMLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DMLSemanticAnalyzer.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DMLSemanticAnalyzer.java (revision 0) @@ -0,0 +1,987 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.ql.exec.ExecDriver; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.MapRedTask; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.InvalidTableException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.Optimizer; +import org.apache.hadoop.hive.ql.plan.DDLWork; +import org.apache.hadoop.hive.ql.plan.createTableDesc; +import org.apache.hadoop.hive.ql.plan.createTableLikeDesc; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.mapred.InputFormat; + +/** + * Implementation of the semantic analyzer + */ + +public class DMLSemanticAnalyzer extends BaseSemanticAnalyzer { + + + private QB qb; + + LogicalPlan logicalPlan; + + + + + private static class Phase1Ctx { + String dest; + int nextNum; + } + + public DMLSemanticAnalyzer(HiveConf hiveConf) throws SemanticException { + super(hiveConf); + logicalPlan = new LogicalPlan(hiveConf); + } + + public void reset() { + super.reset(); + logicalPlan.clearIdToTableNameMap(); + } + + + // + // QB + // + + public QB getQB() { + return qb; + } + + + public LogicalPlan getLogicalPlan() { + return logicalPlan; + } + + + // TODO CWS remove this method. + // overrides method in BaseSemanticAnalyzer. + // this should get pushed into QueryCompiler... + public Map getIdToTableNameMap() { + return logicalPlan.getIdToTableNameMap(); + } + + + @SuppressWarnings("nls") + public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, + String alias) throws SemanticException { + + assert (ast.getToken() != null); + + switch (ast.getToken().getType()) { + case HiveParser.TOK_QUERY: + QB qb = new QB(id, alias, true); + doPhase1(ast, qb, initPhase1Ctx()); + qbexpr.setOpcode(QBExpr.Opcode.NULLOP); + qbexpr.setQB(qb); + break; + case HiveParser.TOK_UNION: + qbexpr.setOpcode(QBExpr.Opcode.UNION); + // query 1 + assert (ast.getChild(0) != null); + QBExpr qbexpr1 = new QBExpr(alias + "-subquery1"); + doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + "-subquery1", + alias + "-subquery1"); + qbexpr.setQBExpr1(qbexpr1); + + // query 2 + assert (ast.getChild(0) != null); + QBExpr qbexpr2 = new QBExpr(alias + "-subquery2"); + doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + "-subquery2", + alias + "-subquery2"); + qbexpr.setQBExpr2(qbexpr2); + + break; + + default: + break; + } + } + + private LinkedHashMap doPhase1GetAggregationsFromSelect( + ASTNode selExpr) { + // Iterate over the selects search for aggregation Trees. + // Use String as keys to eliminate duplicate trees. + LinkedHashMap aggregationTrees = new LinkedHashMap(); + for (int i = 0; i < selExpr.getChildCount(); ++i) { + ASTNode sel = (ASTNode) selExpr.getChild(i).getChild(0); + doPhase1GetAllAggregations(sel, aggregationTrees); + } + return aggregationTrees; + } + + /** + * DFS-scan the expressionTree to find all aggregation subtrees and put them + * in aggregations. + * + * @param expressionTree + * @param aggregations + * the key to the HashTable is the toStringTree() representation of + * the aggregation subtree. + */ + private void doPhase1GetAllAggregations(ASTNode expressionTree, + HashMap aggregations) { + if (expressionTree.getToken().getType() == HiveParser.TOK_FUNCTION + || expressionTree.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { + assert (expressionTree.getChildCount() != 0); + if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { + String functionName = ParseUtils.unescapeIdentifier(expressionTree.getChild(0).getText()); + if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { + aggregations.put(expressionTree.toStringTree(), expressionTree); + return; + } + } + } + for (int i = 0; i < expressionTree.getChildCount(); i++) { + doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i), + aggregations); + } + } + + private ASTNode doPhase1GetDistinctFuncExpr( + HashMap aggregationTrees) throws SemanticException { + ASTNode expr = null; + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + assert (value != null); + if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { + if (expr == null) { + expr = value; + } else { + throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg(expr)); + } + } + } + return expr; + } + + private void processTable(QB qb, ASTNode tabref) throws SemanticException { + // For each table reference get the table name + // and the alias (if alias is not present, the table name + // is used as an alias) + boolean tableSamplePresent = false; + int aliasIndex = 0; + if (tabref.getChildCount() == 2) { + // tablename tablesample + // OR + // tablename alias + ASTNode ct = (ASTNode)tabref.getChild(1); + if (ct.getToken().getType() == HiveParser.TOK_TABLESAMPLE) { + tableSamplePresent = true; + } + else { + aliasIndex = 1; + } + } + else if (tabref.getChildCount() == 3) { + // table name table sample alias + aliasIndex = 2; + tableSamplePresent = true; + } + ASTNode tableTree = (ASTNode)(tabref.getChild(0)); + String alias = ParseUtils.unescapeIdentifier(tabref.getChild(aliasIndex).getText()); + // If the alias is already there then we have a conflict + if (qb.exists(alias)) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref.getChild(aliasIndex))); + } + if (tableSamplePresent) { + ASTNode sampleClause = (ASTNode)tabref.getChild(1); + ArrayList sampleCols = new ArrayList(); + if (sampleClause.getChildCount() > 2) { + for (int i = 2; i < sampleClause.getChildCount(); i++) { + sampleCols.add((ASTNode)sampleClause.getChild(i)); + } + } + // TODO: For now only support sampling on up to two columns + // Need to change it to list of columns + if (sampleCols.size() > 2) { + throw new SemanticException(ErrorMsg.SAMPLE_RESTRICTION.getMsg(tabref.getChild(0))); + } + qb.getParseInfo().setTabSample(alias, new TableSample( + ParseUtils.unescapeIdentifier(sampleClause.getChild(0).getText()), + ParseUtils.unescapeIdentifier(sampleClause.getChild(1).getText()), + sampleCols) + ); + } + // Insert this map into the stats + String table_name = ParseUtils.unescapeIdentifier(tabref.getChild(0).getText()); + qb.setTabAlias(alias, table_name); + + qb.getParseInfo().setSrcForAlias(alias, tableTree); + } + + private void processSubQuery(QB qb, ASTNode subq) throws SemanticException { + + // This is a subquery and must have an alias + if (subq.getChildCount() != 2) { + throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq)); + } + ASTNode subqref = (ASTNode) subq.getChild(0); + String alias = ParseUtils.unescapeIdentifier(subq.getChild(1).getText()); + + // Recursively do the first phase of semantic analysis for the subquery + QBExpr qbexpr = new QBExpr(alias); + + doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias); + + // If the alias is already there then we have a conflict + if (qb.exists(alias)) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq.getChild(1))); + } + // Insert this map into the stats + qb.setSubqAlias(alias, qbexpr); + } + + + + @SuppressWarnings("nls") + private void processJoin(QB qb, ASTNode join) throws SemanticException { + int numChildren = join.getChildCount(); + if ((numChildren != 2) && (numChildren != 3) + && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) + throw new SemanticException("Join with multiple children"); + + for (int num = 0; num < numChildren; num++) { + ASTNode child = (ASTNode) join.getChild(num); + if (child.getToken().getType() == HiveParser.TOK_TABREF) + processTable(qb, child); + else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) + processSubQuery(qb, child); + else if (ParseUtils.isJoinToken(child)) + processJoin(qb, child); + } + } + + @SuppressWarnings({"fallthrough", "nls"}) + public void doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1) + throws SemanticException { + + QBParseInfo qbp = qb.getParseInfo(); + boolean skipRecursion = false; + + if (ast.getToken() != null) { + skipRecursion = true; + switch (ast.getToken().getType()) { + case HiveParser.TOK_SELECTDI: + qb.countSelDi(); + // fall through + case HiveParser.TOK_SELECT: + qb.countSel(); + qbp.setSelExprForClause(ctx_1.dest, ast); + + if (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) + qbp.setHints((ASTNode)ast.getChild(0)); + + LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast); + qbp.setAggregationExprsForClause(ctx_1.dest, aggregations); + qbp.setDistinctFuncExprForClause(ctx_1.dest, + doPhase1GetDistinctFuncExpr(aggregations)); + break; + + case HiveParser.TOK_WHERE: + qbp.setWhrExprForClause(ctx_1.dest, ast); + break; + + case HiveParser.TOK_DESTINATION: + ctx_1.dest = "insclause-" + ctx_1.nextNum; + ctx_1.nextNum++; + + // is there a insert in the subquery + if (qbp.getIsSubQ()) { + ASTNode ch = (ASTNode)ast.getChild(0); + if ((ch.getToken().getType() != HiveParser.TOK_DIR) || + (((ASTNode)ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE)) + throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); + } + + qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); + break; + + case HiveParser.TOK_FROM: + int child_count = ast.getChildCount(); + if (child_count != 1) + throw new SemanticException("Multiple Children " + child_count); + + // Check if this is a subquery + ASTNode frm = (ASTNode) ast.getChild(0); + if (frm.getToken().getType() == HiveParser.TOK_TABREF) + processTable(qb, frm); + else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) + processSubQuery(qb, frm); + else if (ParseUtils.isJoinToken(frm)) + { + processJoin(qb, frm); + qbp.setJoinExpr(frm); + } + break; + + case HiveParser.TOK_CLUSTERBY: + // Get the clusterby aliases - these are aliased to the entries in the + // select list + qbp.setClusterByExprForClause(ctx_1.dest, ast); + break; + + case HiveParser.TOK_DISTRIBUTEBY: + // Get the distribute by aliases - these are aliased to the entries in the + // select list + qbp.setDistributeByExprForClause(ctx_1.dest, ast); + if (qbp.getClusterByForClause(ctx_1.dest) != null) { + throw new SemanticException(ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast)); + } + else if (qbp.getOrderByForClause(ctx_1.dest) != null) { + throw new SemanticException(ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast)); + } + break; + + case HiveParser.TOK_SORTBY: + // Get the sort by aliases - these are aliased to the entries in the + // select list + qbp.setSortByExprForClause(ctx_1.dest, ast); + if (qbp.getClusterByForClause(ctx_1.dest) != null) { + throw new SemanticException(ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg(ast)); + } + else if (qbp.getOrderByForClause(ctx_1.dest) != null) { + throw new SemanticException(ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg(ast)); + } + + break; + + case HiveParser.TOK_ORDERBY: + // Get the order by aliases - these are aliased to the entries in the + // select list + qbp.setOrderByExprForClause(ctx_1.dest, ast); + if (qbp.getClusterByForClause(ctx_1.dest) != null) { + throw new SemanticException(ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg(ast)); + } + break; + + case HiveParser.TOK_GROUPBY: + // Get the groupby aliases - these are aliased to the entries in the + // select list + if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { + throw new SemanticException(ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg(ast)); + } + qbp.setGroupByExprForClause(ctx_1.dest, ast); + skipRecursion = true; + break; + + case HiveParser.TOK_LIMIT: + qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText())); + break; + + case HiveParser.TOK_UNION: + // currently, we dont support subq1 union subq2 - the user has to explicitly say: + // select * from (subq1 union subq2) subqalias + if (!qbp.getIsSubQ()) + throw new SemanticException(ErrorMsg.UNION_NOTIN_SUBQ.getMsg()); + + default: + skipRecursion = false; + break; + } + } + + if (!skipRecursion) { + // Iterate over the rest of the children + int child_count = ast.getChildCount(); + for (int child_pos = 0; child_pos < child_count; ++child_pos) { + + // Recurse + doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1); + } + } + } + + private void getMetaData(QBExpr qbexpr) throws SemanticException { + if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { + getMetaData(qbexpr.getQB()); + } else { + getMetaData(qbexpr.getQBExpr1()); + getMetaData(qbexpr.getQBExpr2()); + } + } + + @SuppressWarnings("nls") + public void getMetaData(QB qb) throws SemanticException { + try { + + LOG.info("Get metadata for source tables"); + + // Go over the tables and populate the related structures + for (String alias : qb.getTabAliases()) { + String tab_name = qb.getTabNameForAlias(alias); + Table tab = null; + try { + tab = getDB().getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tab_name); + } + catch (InvalidTableException ite) { + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb.getParseInfo().getSrcForAlias(alias))); + } + + if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) + throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg(qb.getParseInfo().getSrcForAlias(alias))); + + qb.getMetaData().setSrcForAlias(alias, tab); + } + + LOG.info("Get metadata for subqueries"); + // Go over the subqueries and getMetaData for these + for (String alias : qb.getSubqAliases()) { + QBExpr qbexpr = qb.getSubqForAlias(alias); + getMetaData(qbexpr); + } + + LOG.info("Get metadata for destination tables"); + // Go over all the destination structures and populate the related + // metadata + QBParseInfo qbp = qb.getParseInfo(); + + for (String name : qbp.getClauseNamesForDest()) { + ASTNode ast = qbp.getDestForClause(name); + switch (ast.getToken().getType()) { + case HiveParser.TOK_TAB: { + tableSpec ts = new tableSpec(getDB(), getHiveConf(), ast); + + if (!HiveOutputFormat.class.isAssignableFrom(ts.tableHandle.getOutputFormatClass())) + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg(ast)); + + if(ts.partSpec == null) { + // This is a table + qb.getMetaData().setDestForAlias(name, ts.tableHandle); + } else { + // This is a partition + qb.getMetaData().setDestForAlias(name, ts.partHandle); + } + break; + } + case HiveParser.TOK_LOCAL_DIR: + case HiveParser.TOK_DIR: + { + // This is a dfs file + String fname = ParseUtils.stripQuotes(ast.getChild(0).getText()); + if ((!qb.getParseInfo().getIsSubQ()) && + (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) + { + fname = getContext().getMRTmpFileURI(); + getContext().setResDir(new Path(fname)); + + if ( qb.isCTAS() ) { + qb.setIsQuery(false); + } else { + qb.setIsQuery(true); + } + } + qb.getMetaData().setDestForAlias(name, fname, + (ast.getToken().getType() == HiveParser.TOK_DIR)); + break; + } + default: + throw new SemanticException("Unknown Token Type " + ast.getToken().getType()); + } + } + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); + } + } + + + + + + + + + + + + + + + + + @SuppressWarnings("nls") + public Phase1Ctx initPhase1Ctx() { + + Phase1Ctx ctx_1 = new Phase1Ctx(); + ctx_1.nextNum = 0; + ctx_1.dest = "reduce"; + + return ctx_1; + } + + @Override + @SuppressWarnings("nls") + public void analyzeInternal(ASTNode ast) throws SemanticException { + + this.qb = new QB(null, null, false); + ASTNode child = ast; + + LOG.info("Starting Semantic Analysis"); + + // analyze create table command + if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { + // if it is not CTAS, we don't need to go further and just return + if ( (child = analyzeCreateTable(ast, qb)) == null ) + return; + } + + // continue analyzing from the child ASTNode. + doPhase1(child, qb, initPhase1Ctx()); + LOG.info("Completed phase 1 of Semantic Analysis"); + + getMetaData(qb); + LOG.info("Completed getting MetaData in Semantic Analysis"); + + + logicalPlan = new LogicalPlanGenerator(getContext(), getHiveConf()).genLogicalPlan(qb); + + Optimizer.optimize(logicalPlan); + + // Do any sample pruning + SamplePrunerGenerator.genSamplePruners(logicalPlan, qb); + LOG.info("Completed sample pruning"); + + // At this point we have the complete operator tree + // from which we want to find the reduce operator + physicalPlan = new PhysicalPlanGenerator(getContext(), logicalPlan, qb).genPlan(physicalPlan); + + LOG.info("Completed plan generation"); + + return; + } + + /** + * Gets the table Alias for the column from the column name. This function throws + * and exception in case the same column name is present in multiple table. The exception + * message indicates that the ambiguity could not be resolved. + * + * @param qbm The metadata where the function looks for the table alias + * @param colName The name of the non aliased column + * @param pt The parse tree corresponding to the column(this is used for error reporting) + * @return String + * @throws SemanticException + */ + static String getTabAliasForCol(QBMetaData qbm, String colName, ASTNode pt) + throws SemanticException { + String tabAlias = null; + boolean found = false; + + for(Map.Entry ent: qbm.getAliasToTable().entrySet()) { + for(FieldSchema field: ent.getValue().getAllCols()) { + if (colName.equalsIgnoreCase(field.getName())) { + if (found) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(pt)); + } + + found = true; + tabAlias = ent.getKey(); + } + } + } + return tabAlias; + } + + + public void validate() throws SemanticException { + // Check if the plan contains atleast one path. + + // validate all tasks + for(Task rootTask: getPhysicalPlan().getRootTasks()) + validate(rootTask); + } + + private void validate(Task task) throws SemanticException { + if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { + // If the plan does not contain any path, an empty file + // will be added by ExecDriver at execute time + // FIXME CWS NOOP + } + + if (task.getChildTasks() == null) { + return; + } + + for (Task childTask : task.getChildTasks()) { + validate(childTask); + } + } + + + + + /** + * Analyze the create table command. If it is a regular create-table or create-table-like + * statements, we create a DDLWork and return true. If it is a create-table-as-select, we get the + * necessary info such as the SerDe and Storage Format and put it in QB, and return false, indicating + * the rest of the semantic analyzer need to deal with the select statement with respect to the + * SerDe and Storage Format. + */ + private ASTNode analyzeCreateTable(ASTNode ast, QB qb) + throws SemanticException { + String tableName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); + String likeTableName = null; + List cols = null; + List partCols = null; + List bucketCols = null; + List sortCols = null; + int numBuckets = -1; + String fieldDelim = null; + String fieldEscape = null; + String collItemDelim = null; + String mapKeyDelim = null; + String lineDelim = null; + String comment = null; + String inputFormat = TEXTFILE_INPUT; + String outputFormat = TEXTFILE_OUTPUT; + String location = null; + String serde = null; + Map mapProp = null; + boolean ifNotExists = false; + boolean isExt = false; + ASTNode selectStmt = null; + final int CREATE_TABLE = 0; // regular CREATE TABLE + final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) + final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS) + int command_type = CREATE_TABLE; + + if ("SequenceFile".equalsIgnoreCase(getHiveConf().getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + inputFormat = SEQUENCEFILE_INPUT; + outputFormat = SEQUENCEFILE_OUTPUT; + } else if ("RCFile".equalsIgnoreCase(getHiveConf().getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + inputFormat = RCFILE_INPUT; + outputFormat = RCFILE_OUTPUT; + serde = COLUMNAR_SERDE; + } + + LOG.info("Creating table" + tableName + " positin=" + ast.getCharPositionInLine()); + int numCh = ast.getChildCount(); + + /* Check the 1st-level children and do simple semantic checks: + * 1) CTLT and CTAS should not coexists. + * 2) CTLT or CTAS should not coexists with column list (target table schema). + * 3) CTAS does not support partitioning (for now). + */ + for (int num = 1; num < numCh; num++) + { + ASTNode child = (ASTNode)ast.getChild(num); + switch (child.getToken().getType()) { + case HiveParser.TOK_IFNOTEXISTS: + ifNotExists = true; + break; + case HiveParser.KW_EXTERNAL: + isExt = true; + break; + case HiveParser.TOK_LIKETABLE: + if (child.getChildCount() > 0) { + likeTableName = ParseUtils.unescapeIdentifier(child.getChild(0).getText()); + if ( likeTableName != null ) { + if ( command_type == CTAS ) { + throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); + } + if ( cols != null ) { + throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg()); + } + } + command_type = CTLT; + } + break; + case HiveParser.TOK_QUERY: // CTAS + if ( command_type == CTLT ) { + throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); + } + if ( cols != null ) { + throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg()); + } + // TODO: support partition for CTAS? + if ( partCols != null || bucketCols != null ) { + throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); + } + if ( isExt ) { + throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); + } + command_type = CTAS; + selectStmt = child; + break; + case HiveParser.TOK_TABCOLLIST: + cols = getColumns(child); + break; + case HiveParser.TOK_TABLECOMMENT: + comment = ParseUtils.unescapeSQLString(child.getChild(0).getText()); + break; + case HiveParser.TOK_TABLEPARTCOLS: + partCols = getColumns((ASTNode)child.getChild(0)); + break; + case HiveParser.TOK_TABLEBUCKETS: + bucketCols = getColumnNames((ASTNode)child.getChild(0)); + if (child.getChildCount() == 2) + numBuckets = (Integer.valueOf(child.getChild(1).getText())).intValue(); + else + { + sortCols = getColumnNamesOrder((ASTNode)child.getChild(1)); + numBuckets = (Integer.valueOf(child.getChild(2).getText())).intValue(); + } + break; + case HiveParser.TOK_TABLEROWFORMAT: + + child = (ASTNode)child.getChild(0); + int numChildRowFormat = child.getChildCount(); + for (int numC = 0; numC < numChildRowFormat; numC++) + { + ASTNode rowChild = (ASTNode)child.getChild(numC); + switch (rowChild.getToken().getType()) { + case HiveParser.TOK_TABLEROWFORMATFIELD: + fieldDelim = ParseUtils.unescapeSQLString(rowChild.getChild(0).getText()); + if (rowChild.getChildCount()>=2) { + fieldEscape = ParseUtils.unescapeSQLString(rowChild.getChild(1).getText()); + } + break; + case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: + collItemDelim = ParseUtils.unescapeSQLString(rowChild.getChild(0).getText()); + break; + case HiveParser.TOK_TABLEROWFORMATMAPKEYS: + mapKeyDelim = ParseUtils.unescapeSQLString(rowChild.getChild(0).getText()); + break; + case HiveParser.TOK_TABLEROWFORMATLINES: + lineDelim = ParseUtils.unescapeSQLString(rowChild.getChild(0).getText()); + break; + default: assert false; + } + } + break; + case HiveParser.TOK_TABLESERIALIZER: + + child = (ASTNode)child.getChild(0); + serde = ParseUtils.unescapeSQLString(child.getChild(0).getText()); + if (child.getChildCount() == 2) { + mapProp = new HashMap(); + ASTNode prop = (ASTNode)((ASTNode)child.getChild(1)).getChild(0); + for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { + String key = ParseUtils.unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); + String value = ParseUtils.unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); + mapProp.put(key,value); + } + } + break; + case HiveParser.TOK_TBLSEQUENCEFILE: + inputFormat = SEQUENCEFILE_INPUT; + outputFormat = SEQUENCEFILE_OUTPUT; + break; + case HiveParser.TOK_TBLTEXTFILE: + inputFormat = TEXTFILE_INPUT; + outputFormat = TEXTFILE_OUTPUT; + break; + case HiveParser.TOK_TBLRCFILE: + inputFormat = RCFILE_INPUT; + outputFormat = RCFILE_OUTPUT; + serde = COLUMNAR_SERDE; + break; + case HiveParser.TOK_TABLEFILEFORMAT: + inputFormat = ParseUtils.unescapeSQLString(child.getChild(0).getText()); + outputFormat = ParseUtils.unescapeSQLString(child.getChild(1).getText()); + break; + case HiveParser.TOK_TABLELOCATION: + location = ParseUtils.unescapeSQLString(child.getChild(0).getText()); + break; + default: assert false; + } + } + + // check for existence of table + if ( ifNotExists ) { + try { + List tables = getDB().getTablesByPattern(tableName); + if ( tables != null && tables.size() > 0 ) { // table exists + return null; + } + } catch (HiveException e) { + e.printStackTrace(); + } + } + + // Handle different types of CREATE TABLE command + createTableDesc crtTblDesc = null; + switch ( command_type ) { + + case CREATE_TABLE: // REGULAR CREATE TABLE DDL + crtTblDesc = + new createTableDesc(tableName, isExt, cols, partCols, bucketCols, + sortCols, numBuckets, + fieldDelim, fieldEscape, + collItemDelim, mapKeyDelim, lineDelim, + comment, inputFormat, outputFormat, location, serde, + mapProp, ifNotExists); + + validateCreateTable(crtTblDesc); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), crtTblDesc), getHiveConf())); + break; + + case CTLT: // create table like + createTableLikeDesc crtTblLikeDesc = + new createTableLikeDesc(tableName, isExt, location, ifNotExists, likeTableName); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), crtTblLikeDesc), getHiveConf())); + break; + + case CTAS: // create table as select + + // check for existence of table. Throw an exception if it exists. + try { + Table tab = getDB().getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, + false); // do not throw exception if table does not exist + + if ( tab != null ) { + throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(tableName)); + } + } catch (HiveException e) { // may be unable to get meta data + throw new SemanticException(e); + } + + crtTblDesc = + new createTableDesc(tableName, isExt, cols, partCols, bucketCols, + sortCols, numBuckets, + fieldDelim, fieldEscape, + collItemDelim, mapKeyDelim, lineDelim, + comment, inputFormat, outputFormat, location, serde, + mapProp, ifNotExists); + qb.setTableDesc(crtTblDesc); + + return selectStmt; + default: assert false; // should never be unknown command type + } + return null; + } + + @SuppressWarnings("unchecked") + private void validateCreateTable(createTableDesc crtTblDesc) throws SemanticException { + // no duplicate column names + // currently, it is a simple n*n algorithm - this can be optimized later if need be + // but it should not be a major bottleneck as the number of columns are anyway not so big + + if((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) { + // for now make sure that serde exists + if(StringUtils.isEmpty(crtTblDesc.getSerName()) || SerDeUtils.isNativeSerDe(crtTblDesc.getSerName())) { + throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg()); + } + return; + } + + try { + Class origin = Class.forName(crtTblDesc.getOutputFormat(), true, JavaUtils.getClassLoader()); + Class replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin); + if(replaced == null) + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); + } catch (ClassNotFoundException e) { + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); + } + + Iterator iterCols = crtTblDesc.getCols().iterator(); + List colNames = new ArrayList(); + while (iterCols.hasNext()) { + String colName = iterCols.next().getName(); + Iterator iter = colNames.iterator(); + while (iter.hasNext()) { + String oldColName = iter.next(); + if (colName.equalsIgnoreCase(oldColName)) + throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES.getMsg()); + } + colNames.add(colName); + } + + if (crtTblDesc.getBucketCols() != null) + { + // all columns in cluster and sort are valid columns + Iterator bucketCols = crtTblDesc.getBucketCols().iterator(); + while (bucketCols.hasNext()) { + String bucketCol = bucketCols.next(); + boolean found = false; + Iterator colNamesIter = colNames.iterator(); + while (colNamesIter.hasNext()) { + String colName = colNamesIter.next(); + if (bucketCol.equalsIgnoreCase(colName)) { + found = true; + break; + } + } + if (!found) + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); + } + } + + if (crtTblDesc.getSortCols() != null) + { + // all columns in cluster and sort are valid columns + Iterator sortCols = crtTblDesc.getSortCols().iterator(); + while (sortCols.hasNext()) { + String sortCol = sortCols.next().getCol(); + boolean found = false; + Iterator colNamesIter = colNames.iterator(); + while (colNamesIter.hasNext()) { + String colName = colNamesIter.next(); + if (sortCol.equalsIgnoreCase(colName)) { + found = true; + break; + } + } + if (!found) + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); + } + } + + if (crtTblDesc.getPartCols() != null) + { + // there is no overlap between columns and partitioning columns + Iterator partColsIter = crtTblDesc.getPartCols().iterator(); + while (partColsIter.hasNext()) { + String partCol = partColsIter.next().getName(); + Iterator colNamesIter = colNames.iterator(); + while (colNamesIter.hasNext()) { + String colName = ParseUtils.unescapeIdentifier(colNamesIter.next()); + if (partCol.equalsIgnoreCase(colName)) + throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg()); + } + } + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (working copy) @@ -81,7 +81,7 @@ fromScheme = "file"; } else { // use default values from fs.default.name - URI defaultURI = FileSystem.get(conf).getUri(); + URI defaultURI = FileSystem.get(getHiveConf()).getUri(); fromScheme = defaultURI.getScheme(); fromAuthority = defaultURI.getAuthority(); } @@ -89,7 +89,7 @@ // if scheme is specified but not authority then use the default authority if(fromScheme.equals("hdfs") && StringUtils.isEmpty(fromAuthority)) { - URI defaultURI = FileSystem.get(conf).getUri(); + URI defaultURI = FileSystem.get(getHiveConf()).getUri(); fromAuthority = defaultURI.getAuthority(); } @@ -111,7 +111,7 @@ } try { - FileStatus [] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), + FileStatus [] srcs = matchFilesOrDir(FileSystem.get(fromURI, getHiveConf()), new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath())); @@ -166,7 +166,7 @@ // initialize load path URI fromURI; try { - String fromPath = stripQuotes(from_t.getText()); + String fromPath = ParseUtils.stripQuotes(from_t.getText()); fromURI = initializeFromURI(fromPath); } catch (IOException e) { throw new SemanticException (ErrorMsg.INVALID_PATH.getMsg(from_t, e.getMessage()), e); @@ -175,7 +175,7 @@ } // initialize destination table/partition - tableSpec ts = new tableSpec(db, conf, (ASTNode) table_t); + tableSpec ts = new tableSpec(getDB(), getHiveConf(), (ASTNode) table_t); URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation() : ts.tableHandle.getDataLocation(); List parts = ts.tableHandle.getTTable().getPartitionKeys(); @@ -193,15 +193,15 @@ if(isLocal) { // if the local keyword is specified - we will always make a copy. this might seem redundant in the case // that the hive warehouse is also located in the local file system - but that's just a test case. - String copyURIStr = ctx.getExternalTmpFileURI(toURI); + String copyURIStr = getContext().getExternalTmpFileURI(toURI); URI copyURI = URI.create(copyURIStr); - rTask = TaskFactory.get(new copyWork(fromURI.toString(), copyURIStr), this.conf); + rTask = TaskFactory.get(new copyWork(fromURI.toString(), copyURIStr), getHiveConf()); fromURI = copyURI; } // create final load/move work - String loadTmpPath = ctx.getExternalTmpFileURI(toURI); + String loadTmpPath = getContext().getExternalTmpFileURI(toURI); loadTableDesc loadTableWork = new loadTableDesc(fromURI.toString(), loadTmpPath, Utilities.getTableDesc(ts.tableHandle), (ts.partSpec != null) ? ts.partSpec : @@ -209,11 +209,11 @@ isOverWrite); if(rTask != null) { - rTask.addDependentTask(TaskFactory.get(new moveWork(getInputs(), getOutputs(), loadTableWork, null, true), this.conf)); + rTask.addDependentTask(TaskFactory.get(new moveWork(getPhysicalPlan(), loadTableWork, null, true), getHiveConf())); } else { - rTask = TaskFactory.get(new moveWork(getInputs(), getOutputs(), loadTableWork, null, true), this.conf); + rTask = TaskFactory.get(new moveWork(getPhysicalPlan(), loadTableWork, null, true), getHiveConf()); } - rootTasks.add(rTask); + getPhysicalPlan().addRootTask(rTask); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java (working copy) @@ -21,7 +21,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.plan.FunctionWork; import org.apache.hadoop.hive.ql.plan.createFunctionDesc; @@ -47,15 +46,15 @@ private void analyzeCreateFunction(ASTNode ast) throws SemanticException { String functionName = ast.getChild(0).getText(); - String className = unescapeSQLString(ast.getChild(1).getText()); + String className = ParseUtils.unescapeSQLString(ast.getChild(1).getText()); createFunctionDesc desc = new createFunctionDesc(functionName, className); - rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new FunctionWork(desc), getHiveConf())); } private void analyzeDropFunction(ASTNode ast) throws SemanticException { String functionName = ast.getChild(0).getText(); dropFunctionDesc desc = new dropFunctionDesc(functionName); - rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new FunctionWork(desc), getHiveConf())); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/LogicalPlan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/LogicalPlan.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LogicalPlan.java (revision 0) @@ -0,0 +1,360 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; +import org.apache.hadoop.hive.ql.plan.exprNodeDesc; +import org.apache.hadoop.hive.ql.plan.loadFileDesc; +import org.apache.hadoop.hive.ql.plan.loadTableDesc; + + +public class LogicalPlan { + + private HiveConf hiveConf; + + private Map tsOpToPartPrunerMap; + private Map aliasToSamplePrunerMap; + private Map> aliasToTopOpMap; + private Map tsOpToTableMap; + private Map, OpParseContext> opToParseCtxMap; + private Map joinOpToJoinTreeMap; + private List mapJoinOpsNoReducer; + private UnionProcContext uCtx; + + private Map idToTableNameMap; + + private List loadTableWork; + private List loadFileWork; + + private int destTableId; + + + /** + * ReadEntitites that are passed to the hooks. + */ + private Set inputs; + /** + * List of WriteEntities that are passed to the hooks. + */ + private Set outputs; + + + + + public LogicalPlan(HiveConf hiveConf) { + this.hiveConf = hiveConf; + tsOpToPartPrunerMap = new HashMap(); + aliasToSamplePrunerMap = new HashMap(); + aliasToTopOpMap = new HashMap>(); + tsOpToTableMap = new HashMap(); + opToParseCtxMap = new HashMap, OpParseContext>(); + joinOpToJoinTreeMap = new HashMap(); + mapJoinOpsNoReducer = new ArrayList(); + idToTableNameMap = new HashMap(); + loadTableWork = new ArrayList(); + loadFileWork = new ArrayList(); + inputs = new LinkedHashSet(); + outputs = new LinkedHashSet(); + destTableId = 1; + } + + public HiveConf getHiveConf() { + return hiveConf; + } + + + // TODO CWS set in genFileSinkPlan during Logical Operator phase + public void setTableId(String id, String tableName) { + idToTableNameMap.put(id, tableName); + } + + // TODO CWS called from Driver.execute() + public Map getIdToTableNameMap() { + return new HashMap(idToTableNameMap); + } + + public void clearIdToTableNameMap() { + idToTableNameMap.clear(); + } + + // + // opToPartPruner + // + + public Map getTsOpToPartPrunerMap() { + return tsOpToPartPrunerMap; + } + + public exprNodeDesc getPartPruner(TableScanOperator tableScanOp) { + return tsOpToPartPrunerMap.get(tableScanOp); + } + + public void addPartPruner(TableScanOperator tableScanOp, exprNodeDesc partPruner) { + tsOpToPartPrunerMap.put(tableScanOp, partPruner); + } + + // + // aliasToSamplePruner + // + + + public SamplePruner getSamplePruner(String alias) { + return aliasToSamplePrunerMap.get(alias); + } + + public void addSamplePruner(String alias, SamplePruner samplePruner) { + aliasToSamplePrunerMap.put(alias, samplePruner); + } + + // + // topOps + // + + public Operator getTopOp(String alias) { + return aliasToTopOpMap.get(alias); + } + + public Collection> getTopOps() { + return aliasToTopOpMap.values(); + } + + public Set getTopOpAliases() { + return aliasToTopOpMap.keySet(); + } + + public void addTopOp(String alias, Operator topOp) { + aliasToTopOpMap.put(alias, topOp); + } + + + + // + // opParseCtx + // + + public Map, OpParseContext> getOpToParseContextMap() { + return opToParseCtxMap; + } + + public OpParseContext getOpParseContext(Operator op) { + return opToParseCtxMap.get(op); + } + + public void addOpParseContext(Operator op, OpParseContext pctx) { + opToParseCtxMap.put(op, pctx); + } + + + /** + * Get the row resolver given an operator. + */ + public RowResolver getRowResolver(Operator opt) { + return opToParseCtxMap.get(opt).getRR(); + } + + public void setRowResolver(Operator op, RowResolver rr) { + OpParseContext opCtx = opToParseCtxMap.get(op); + if (null == opCtx) { + opCtx = new OpParseContext(); + } + opCtx.setRR(rr); + opToParseCtxMap.put(op, opCtx); + } + + // + // topToTable + // + + public Map getTopToTableMap() { + return tsOpToTableMap; + } + + public Table getTable(TableScanOperator tsOp) { + return tsOpToTableMap.get(tsOp); + } + + public void addTable(TableScanOperator tsOp, Table table) { + tsOpToTableMap.put(tsOp, table); + } + + + // + // joinContext + // + + public void addJoinTree(JoinOperator joinOp, QBJoinTree joinTree) { + joinOpToJoinTreeMap.put(joinOp, joinTree); + } + + public void removeJoinTree(JoinOperator joinOp) { + joinOpToJoinTreeMap.remove(joinOp); + } + + public QBJoinTree getJoinTree(JoinOperator joinOp) { + return joinOpToJoinTreeMap.get(joinOp); + } + + public Collection getJoinTrees() { + return joinOpToJoinTreeMap.values(); + } + + public Set getJoinOps() { + return joinOpToJoinTreeMap.keySet(); + } + + + // + // listMapJoinOpsNoReducer + // + + public List getMapJoinOpsNoReducer() { + return new ArrayList(mapJoinOpsNoReducer); + } + + public void setListMapJoinOpsNoReducer(List listMapJoinOpsNoReducer) { + this.mapJoinOpsNoReducer = new ArrayList(listMapJoinOpsNoReducer); + } + + // + // uCtx + // + + public UnionProcContext getUCtx() { + return uCtx; + } + + public void setUCtx(UnionProcContext uCtx) { + this.uCtx = uCtx; + } + + // + // loadTableWork + // + + public List getLoadTableWork() { + return new ArrayList(loadTableWork); + } + + public void addLoadTableWork(loadTableDesc desc) { + loadTableWork.add(desc); + } + + // + // loadFileWork + // + + + public List getLoadFileWork() { + return new ArrayList(loadFileWork); + } + + public void addLoadFileWork(loadFileDesc desc) { + loadFileWork.add(desc); + } + + // + // destTableId + // + + public int getDestTableId() { + return destTableId; + } + + public void setDestTableId(int destTableId) { + this.destTableId = destTableId; + } + + + + // + // Inputs + // + + public Set getInputs() { + return new LinkedHashSet(inputs); + } + + public void addInput(ReadEntity input) { + inputs.add(input); + } + + public void removeInput(ReadEntity input) { + inputs.remove(input); + } + + + // + // Outputs + // + + public Set getOutputs() { + return new LinkedHashSet(outputs); + } + + public void addOutput(WriteEntity output) { + outputs.add(output); + } + + public void removeOutput(WriteEntity output) { + outputs.remove(output); + } + + public void clearOutputs() { + outputs.clear(); + } + + + + public Operator newChildOperator(T conf, Operator ... parentOps) { + return OperatorFactory.getAndMakeChild(conf, parentOps); + } + + public Operator newChildOperator(T conf, RowSchema rowSchema, Operator ... parentOps) { + Operator childOp = newChildOperator(conf, parentOps); + childOp.setSchema(rowSchema); + return childOp; + } + + public Operator newChildOperator(T conf, RowResolver rowResolver, Operator ... parentOps) { + Operator childOp = newChildOperator(conf, rowResolver.getRowSchema(), parentOps); + setRowResolver(childOp, rowResolver); + return childOp; + } + + +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PhysicalPlan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PhysicalPlan.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PhysicalPlan.java (revision 0) @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; + + +public class PhysicalPlan { + + private List> rootTasks; + private Task fetchTask; + private boolean fetchTaskInit; + + /** + * ReadEntitites that are passed to the hooks. + */ + private Set inputs; + /** + * List of WriteEntities that are passed to the hooks. + */ + private Set outputs; + + + public PhysicalPlan() throws SemanticException { + rootTasks = new ArrayList>(); + inputs = new LinkedHashSet(); + outputs = new LinkedHashSet(); + } + + public PhysicalPlan(LogicalPlan logicalPlan) throws SemanticException { + this(); + inputs.addAll(logicalPlan.getInputs()); + outputs.addAll(logicalPlan.getOutputs()); + } + + + // + // Root Tasks + // + + + public List> getRootTasks() { + return new ArrayList>(rootTasks); + } + + public void addRootTask(Task task) { + rootTasks.add(task); + } + + public void clearRootTasks() { + rootTasks.clear(); + } + + public void removeRootTask(Task task) { + rootTasks.remove(task); + } + + + // + // Fetch Task + // + + + + /** + * @return the fetchTask + */ + public Task getFetchTask() { + return fetchTask; + } + + /** + * @param fetchTask the fetchTask to set + */ + public void setFetchTask(Task fetchTask) { + this.fetchTask = fetchTask; + } + + // + // fetchTaskInit + // + + public boolean getFetchTaskInit() { + return fetchTaskInit; + } + + public void setFetchTaskInit(boolean fetchTaskInit) { + this.fetchTaskInit = fetchTaskInit; + } + + + // + // Inputs + // + + public Set getInputs() { + return new LinkedHashSet(inputs); + } + + public void addInput(ReadEntity input) { + inputs.add(input); + } + + public void addInputs(Set inputs) { + this.inputs.addAll(inputs); + } + + public void removeInput(ReadEntity input) { + inputs.remove(input); + } + + + // + // Outputs + // + + public Set getOutputs() { + return new LinkedHashSet(outputs); + } + + public void addOutput(WriteEntity output) { + outputs.add(output); + } + + public void addOutputs(Set outputs) { + this.outputs.addAll(outputs); + } + + public void removeOutput(WriteEntity output) { + outputs.remove(output); + } + + public void clearOutputs() { + outputs.clear(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (working copy) @@ -199,14 +199,14 @@ switch (expr.getToken().getType()) { case HiveParser.StringLiteral: - str = BaseSemanticAnalyzer.unescapeSQLString(expr.getText()); + str = ParseUtils.unescapeSQLString(expr.getText()); break; case HiveParser.TOK_CHARSETLITERAL: - str = BaseSemanticAnalyzer.charSetString(expr.getChild(0).getText(), expr.getChild(1).getText()); + str = ParseUtils.charSetString(expr.getChild(0).getText(), expr.getChild(1).getText()); break; default: // HiveParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT | HiveParse.KW_RIGHT - str = BaseSemanticAnalyzer.unescapeIdentifier(expr.getText()); + str = ParseUtils.unescapeIdentifier(expr.getText()); break; } return new exprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); @@ -295,7 +295,7 @@ } assert(expr.getChildCount() == 1); - String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); + String tableOrCol = ParseUtils.unescapeIdentifier(expr.getChild(0).getText()); boolean isTableAlias = input.hasTableAlias(tableOrCol); ColumnInfo colInfo = input.get(null, tableOrCol); @@ -587,7 +587,7 @@ && nodeOutputs[0] == null) { RowResolver input = ctx.getInputRR(); - String tableAlias = SemanticAnalyzer.unescapeIdentifier( + String tableAlias = ParseUtils.unescapeIdentifier( expr.getChild(0).getChild(0).getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. Index: ql/src/java/org/apache/hadoop/hive/ql/parse/LogicalPlanGenerator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/LogicalPlanGenerator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LogicalPlanGenerator.java (revision 0) @@ -0,0 +1,3877 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.Vector; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.RecordReader; +import org.apache.hadoop.hive.ql.exec.RecordWriter; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.aggregationDesc; +import org.apache.hadoop.hive.ql.plan.createTableDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.exprNodeNullDesc; +import org.apache.hadoop.hive.ql.plan.extractDesc; +import org.apache.hadoop.hive.ql.plan.fileSinkDesc; +import org.apache.hadoop.hive.ql.plan.filterDesc; +import org.apache.hadoop.hive.ql.plan.forwardDesc; +import org.apache.hadoop.hive.ql.plan.groupByDesc; +import org.apache.hadoop.hive.ql.plan.joinDesc; +import org.apache.hadoop.hive.ql.plan.limitDesc; +import org.apache.hadoop.hive.ql.plan.loadFileDesc; +import org.apache.hadoop.hive.ql.plan.loadTableDesc; +import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.scriptDesc; +import org.apache.hadoop.hive.ql.plan.selectDesc; +import org.apache.hadoop.hive.ql.plan.tableDesc; +import org.apache.hadoop.hive.ql.plan.tableScanDesc; +import org.apache.hadoop.hive.ql.plan.unionDesc; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + + +public class LogicalPlanGenerator { + + private Hive hiveDB; + private HiveConf hiveConf; + private LogicalPlan logicalPlan; + + protected final Log LOG; + protected final LogHelper console; + + private Context context; + + public LogicalPlanGenerator(Context context, HiveConf hiveConf) throws SemanticException { + try { + this.context = context; + this.hiveConf = hiveConf; + this.hiveDB = Hive.get(hiveConf); + LOG = LogFactory.getLog(this.getClass().getName()); + console = new LogHelper(LOG); + } catch (HiveException e) { + throw new SemanticException(e); + } + } + + private Context getContext() { + return context; + } + + + + /** + * Generates an expression node descriptor for the expression passed in the arguments. This + * function uses the row resolver and the metadata informatinon that are passed as arguments + * to resolve the column names to internal names. + * @param expr The expression + * @param input The row resolver + * @return exprNodeDesc + * @throws SemanticException + */ + @SuppressWarnings("nls") + public static exprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) + throws SemanticException { + // We recursively create the exprNodeDesc. Base cases: when we encounter + // a column ref, we convert that into an exprNodeColumnDesc; when we encounter + // a constant, we convert that into an exprNodeConstantDesc. For others we just + // build the exprNodeFuncDesc with recursively built children. + + // If the current subExpression is pre-calculated, as in Group-By etc. + ColumnInfo colInfo = input.get("", expr.toStringTree()); + if (colInfo != null) { + return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol()); + } + + // Create the walker, the rules dispatcher and the context. + TypeCheckCtx tcCtx = new TypeCheckCtx(input); + + // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher + // generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), TypeCheckProcFactory.getNullExprProcessor()); + opRules.put(new RuleRegExp("R2", HiveParser.Number + "%"), TypeCheckProcFactory.getNumExprProcessor()); + opRules.put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + + HiveParser.StringLiteral + "%|" + + HiveParser.TOK_CHARSETLITERAL + "%|" + + HiveParser.KW_IF + "%|" + + HiveParser.KW_CASE + "%|" + + HiveParser.KW_WHEN + "%"), + TypeCheckProcFactory.getStrExprProcessor()); + opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" + HiveParser.KW_FALSE + "%"), + TypeCheckProcFactory.getBoolExprProcessor()); + opRules.put(new RuleRegExp("R5", HiveParser.TOK_TABLE_OR_COL + "%"), TypeCheckProcFactory.getColumnExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(TypeCheckProcFactory.getDefaultExprProcessor(), opRules, tcCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.add(expr); + HashMap nodeOutputs = new HashMap(); + ogw.startWalking(topNodes, nodeOutputs); + exprNodeDesc desc = (exprNodeDesc)nodeOutputs.get(expr); + if (desc == null) { + throw new SemanticException(tcCtx.getError()); + } + + return desc; + } + + + private boolean isPresent(String[] list, String elem) { + for (String s : list) + if (s.equals(elem)) + return true; + + return false; + } + + + @SuppressWarnings("nls") + private void parseJoinCondPopulateAlias(QBJoinTree joinTree, + ASTNode condn, Vector leftAliases, Vector rightAliases, + ArrayList fields) + throws SemanticException { + switch (condn.getToken().getType()) { + case HiveParser.TOK_TABLE_OR_COL: + String tableOrCol = ParseUtils.unescapeIdentifier(condn.getChild(0).getText().toLowerCase()); + if (isPresent(joinTree.getLeftAliases(), tableOrCol)) { + if (!leftAliases.contains(tableOrCol)) + leftAliases.add(tableOrCol); + } else if (isPresent(joinTree.getRightAliases(), tableOrCol)) { + if (!rightAliases.contains(tableOrCol)) + rightAliases.add(tableOrCol); + } else { + // We don't support columns without table prefix in JOIN condition right now. + // We need to pass Metadata here to know which table the column belongs to. + throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn.getChild(0))); + } + break; + + case HiveParser.Identifier: + // it may be a field name, return the identifier and let the caller decide whether it is or not + if ( fields != null ) { + fields.add(ParseUtils.unescapeIdentifier(condn.getToken().getText().toLowerCase())); + } + break; + case HiveParser.Number: + case HiveParser.StringLiteral: + case HiveParser.TOK_CHARSETLITERAL: + case HiveParser.KW_TRUE: + case HiveParser.KW_FALSE: + break; + + case HiveParser.TOK_FUNCTION: + // check all the arguments + for (int i = 1; i < condn.getChildCount(); i++) + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i), + leftAliases, rightAliases, null); + break; + + default: + // This is an operator - so check whether it is unary or binary operator + if (condn.getChildCount() == 1) + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), + leftAliases, rightAliases, null); + else if (condn.getChildCount() == 2) { + + ArrayList fields1 = null; + // if it is a dot operator, remember the field name of the rhs of the left semijoin + if (joinTree.getNoSemiJoin() == false && + condn.getToken().getText().equals("." )) { + // get the semijoin rhs table name and field name + fields1 = new ArrayList(); + int rhssize = rightAliases.size(); + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), + leftAliases, rightAliases, null); + String rhsAlias = null; + + if ( rightAliases.size() > rhssize ) { // the new table is rhs table + rhsAlias = rightAliases.get(rightAliases.size()-1); + } + + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), + leftAliases, rightAliases, fields1); + if ( rhsAlias != null && fields1.size() > 0 ) { + joinTree.addRHSSemijoinColumns(rhsAlias, condn); + } + } else { + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), + leftAliases, rightAliases, null); + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), + leftAliases, rightAliases, fields1); + } + } else + throw new SemanticException(condn.toStringTree() + " encountered with " + + condn.getChildCount() + " children"); + break; + } + } + + private void populateAliases(Vector leftAliases, + Vector rightAliases, ASTNode condn, QBJoinTree joinTree, + Vector leftSrc) throws SemanticException { + if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(condn)); + + if (rightAliases.size() != 0) { + assert rightAliases.size() == 1; + joinTree.getExpressions().get(1).add(condn); + } else if (leftAliases.size() != 0) { + joinTree.getExpressions().get(0).add(condn); + for (String s : leftAliases) + if (!leftSrc.contains(s)) + leftSrc.add(s); + } else + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2.getMsg(condn)); + } + + /** + * Parse the join condition. + * If the condition is a join condition, throw an error if it is not an equality. Otherwise, break it into left and + * right expressions and store in the join tree. + * If the condition is a join filter, add it to the filter list of join tree. The join condition can contains conditions + * on both the left and tree trees and filters on either. Currently, we only support equi-joins, so we throw an error + * if the condition involves both subtrees and is not a equality. Also, we only support AND i.e ORs are not supported + * currently as their semantics are not very clear, may lead to data explosion and there is no usecase. + * @param joinTree jointree to be populated + * @param joinCond join condition + * @param leftSrc left sources + * @throws SemanticException + */ + private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, Vector leftSrc) + throws SemanticException { + if (joinCond == null) + return; + + switch (joinCond.getToken().getType()) { + case HiveParser.KW_OR: + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(joinCond)); + + case HiveParser.KW_AND: + parseJoinCondition(joinTree, (ASTNode) joinCond + .getChild(0), leftSrc); + parseJoinCondition(joinTree, (ASTNode) joinCond + .getChild(1), leftSrc); + break; + + case HiveParser.EQUAL: + ASTNode leftCondn = (ASTNode) joinCond.getChild(0); + Vector leftCondAl1 = new Vector(); + Vector leftCondAl2 = new Vector(); + parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null); + + ASTNode rightCondn = (ASTNode) joinCond.getChild(1); + Vector rightCondAl1 = new Vector(); + Vector rightCondAl2 = new Vector(); + parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2, null); + + // is it a filter or a join condition + if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) || + ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond)); + + if (leftCondAl1.size() != 0) { + if ((rightCondAl1.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) + joinTree.getFilters().get(0).add(joinCond); + else if (rightCondAl2.size() != 0) { + populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc); + populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc); + } + } + else if (leftCondAl2.size() != 0) { + if ((rightCondAl2.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) + joinTree.getFilters().get(1).add(joinCond); + else if (rightCondAl1.size() != 0) { + populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc); + populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc); + } + } + else if (rightCondAl1.size() != 0) + joinTree.getFilters().get(0).add(joinCond); + else + joinTree.getFilters().get(1).add(joinCond); + + break; + + default: + boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION); + + // Create all children + int childrenBegin = (isFunction ? 1 : 0); + ArrayList> leftAlias = new ArrayList>(joinCond.getChildCount() - childrenBegin); + ArrayList> rightAlias = new ArrayList>(joinCond.getChildCount() - childrenBegin); + for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) { + Vector left = new Vector(); + Vector right = new Vector(); + leftAlias.add(left); + rightAlias.add(right); + } + + for (int ci=childrenBegin; ci left : leftAlias) { + if (left.size() != 0) { + leftAliasNull = false; + break; + } + } + + boolean rightAliasNull = true; + for (Vector right : rightAlias) { + if (right.size() != 0) { + rightAliasNull = false; + break; + } + } + + if (!leftAliasNull && !rightAliasNull) + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond)); + + if (!leftAliasNull) + joinTree.getFilters().get(0).add(joinCond); + else + joinTree.getFilters().get(1).add(joinCond); + + break; + } + } + + @SuppressWarnings("nls") + public Operator putOpInsertMap(Operator op, RowResolver rr) + { + OpParseContext ctx = new OpParseContext(rr); + logicalPlan.addOpParseContext(op, ctx); + return op; + } + + @SuppressWarnings("nls") + private Operator genFilterPlan(String dest, QB qb, + Operator input) throws SemanticException { + + ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); + return genFilterPlan(qb, (ASTNode)whereExpr.getChild(0), input); + } + + /** + * create a filter plan. The condition and the inputs are specified. + * @param qb current query block + * @param condn The condition to be resolved + * @param input the input operator + */ + @SuppressWarnings("nls") + private Operator genFilterPlan(QB qb, ASTNode condn, Operator inputOp) throws SemanticException { + + RowResolver inputRR = logicalPlan.getRowResolver(inputOp); + Operator filterOp = logicalPlan.newChildOperator( + new filterDesc(genExprNodeDesc(condn, inputRR), false), + logicalPlan.getRowResolver(inputOp), + inputOp); + + LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " + inputRR.toString()); + return filterOp; + } + + @SuppressWarnings("nls") + private Integer genColListRegex(String colRegex, String tabAlias, String alias, ASTNode sel, + ArrayList col_list, RowResolver input, Integer pos, + RowResolver output) throws SemanticException { + + // The table alias should exist + if (tabAlias != null && !input.hasTableAlias(tabAlias)) + throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel)); + + // TODO: Have to put in the support for AS clause + Pattern regex = null; + try { + regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE); + } catch (PatternSyntaxException e) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e.getMessage())); + } + + int matched = 0; + // This is the tab.* case + // In this case add all the columns to the fieldList + // from the input schema + for(ColumnInfo colInfo: input.getColumnInfos()) { + String name = colInfo.getInternalName(); + String [] tmp = input.reverseLookup(name); + + // Skip the colinfos which are not for this particular alias + if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) { + continue; + } + + // Not matching the regex? + if (!regex.matcher(tmp[1]).matches()) { + continue; + } + + exprNodeColumnDesc expr = new exprNodeColumnDesc(colInfo.getType(), name, + colInfo.getTabAlias(), + colInfo.getIsPartitionCol()); + col_list.add(expr); + output.put(tmp[0], tmp[1], + new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol())); + pos = Integer.valueOf(pos.intValue() + 1); + matched ++; + } + if (matched == 0) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel)); + } + return pos; + } + + public static String getColumnInternalName(int pos) { + return HiveConf.getColumnInternalName(pos); + } + + + /** + * If the user script command needs any modifications - do it here + */ + private String getFixedCmd(String cmd) { + SessionState ss = SessionState.get(); + if(ss == null) + return cmd; + + // for local mode - replace any references to packaged files by name with + // the reference to the original file path + if(ss.getConf().get("mapred.job.tracker", "local").equals("local")) { + Set files = ss.list_resource(SessionState.ResourceType.FILE, null); + if((files != null) && !files.isEmpty()) { + int end = cmd.indexOf(" "); + String prog = (end == -1) ? cmd : cmd.substring(0, end); + String args = (end == -1) ? "" : cmd.substring(end, cmd.length()); + + for(String oneFile: files) { + Path p = new Path(oneFile); + if(p.getName().equals(prog)) { + cmd = oneFile + args; + break; + } + } + } + } + + return cmd; + } + + @SuppressWarnings("unchecked") + private tableDesc getTableDescFromSerDe(ASTNode child, String cols, String colTypes, boolean defaultCols) throws SemanticException { + if (child.getType() == HiveParser.TOK_SERDENAME) { + String serdeName = ParseUtils.unescapeSQLString(child.getChild(0).getText()); + Class serdeClass = null; + + try { + serdeClass = (Class)Class.forName(serdeName, true, JavaUtils.getClassLoader()); + } catch (ClassNotFoundException e) { + throw new SemanticException(e); + } + + tableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer.toString(Utilities.tabCode), cols, colTypes, defaultCols, true); + // copy all the properties + if (child.getChildCount() == 2) { + ASTNode prop = (ASTNode)((ASTNode)child.getChild(1)).getChild(0); + for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { + String key = ParseUtils.unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); + String value = ParseUtils.unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); + tblDesc.getProperties().setProperty(key,value); + } + } + return tblDesc; + } + else if (child.getType() == HiveParser.TOK_SERDEPROPS) { + tableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols); + int numChildRowFormat = child.getChildCount(); + for (int numC = 0; numC < numChildRowFormat; numC++) + { + ASTNode rowChild = (ASTNode)child.getChild(numC); + switch (rowChild.getToken().getType()) { + case HiveParser.TOK_TABLEROWFORMATFIELD: + String fieldDelim = ParseUtils.unescapeSQLString(rowChild.getChild(0).getText()); + tblDesc.getProperties().setProperty(Constants.FIELD_DELIM, fieldDelim); + tblDesc.getProperties().setProperty(Constants.SERIALIZATION_FORMAT, fieldDelim); + + if (rowChild.getChildCount()>=2) { + String fieldEscape = ParseUtils.unescapeSQLString(rowChild.getChild(1).getText()); + tblDesc.getProperties().setProperty(Constants.ESCAPE_CHAR, fieldEscape); + } + break; + case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: + tblDesc.getProperties().setProperty(Constants.COLLECTION_DELIM, ParseUtils.unescapeSQLString(rowChild.getChild(0).getText())); + break; + case HiveParser.TOK_TABLEROWFORMATMAPKEYS: + tblDesc.getProperties().setProperty(Constants.MAPKEY_DELIM, ParseUtils.unescapeSQLString(rowChild.getChild(0).getText())); + break; + case HiveParser.TOK_TABLEROWFORMATLINES: + tblDesc.getProperties().setProperty(Constants.LINE_DELIM, ParseUtils.unescapeSQLString(rowChild.getChild(0).getText())); + break; + default: assert false; + } + } + + return tblDesc; + } + + // should never come here + return null; + } + + private void failIfColAliasExists(Set nameSet, String name) throws SemanticException { + if(nameSet.contains(name)) + throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS.getMsg(name)); + nameSet.add(name); + } + + @SuppressWarnings({ "nls", "unchecked" }) + private Operator genScriptPlan(ASTNode trfm, QB qb, + Operator input) throws SemanticException { + // If there is no "AS" clause, the output schema will be "key,value" + ArrayList outputCols = new ArrayList(); + int inputSerDeNum = 1, inputRecordWriterNum = 2; + int outputSerDeNum = 4, outputRecordReaderNum = 5; + int outputColsNum = 6; + boolean outputColNames = false, outputColSchemas = false; + int execPos = 3; + boolean defaultOutputCols = false; + + // Go over all the children + if (trfm.getChildCount() > outputColsNum) { + ASTNode outCols = (ASTNode)trfm.getChild(outputColsNum); + if (outCols.getType() == HiveParser.TOK_ALIASLIST) + outputColNames = true; + else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) + outputColSchemas = true; + } + + // If column type is not specified, use a string + if (!outputColNames && !outputColSchemas) { + outputCols.add(new ColumnInfo("key", TypeInfoFactory.stringTypeInfo, null, false)); + outputCols.add(new ColumnInfo("value", TypeInfoFactory.stringTypeInfo, null, false)); + defaultOutputCols = true; + } + else { + ASTNode collist = (ASTNode) trfm.getChild(outputColsNum); + int ccount = collist.getChildCount(); + + Set colAliasNamesDuplicateCheck = new HashSet(); + if (outputColNames) { + for (int i=0; i < ccount; ++i) { + String colAlias = ParseUtils.unescapeIdentifier(((ASTNode)collist.getChild(i)).getText()); + failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); + outputCols.add(new ColumnInfo(colAlias, TypeInfoFactory.stringTypeInfo, null, false)); + } + } + else { + for (int i=0; i < ccount; ++i) { + ASTNode child = (ASTNode) collist.getChild(i); + assert child.getType() == HiveParser.TOK_TABCOL; + String colAlias = ParseUtils.unescapeIdentifier(((ASTNode)child.getChild(0)).getText()); + failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); + outputCols.add(new ColumnInfo(colAlias, + TypeInfoUtils.getTypeInfoFromTypeString(DDLSemanticAnalyzer.getTypeName(((ASTNode)child.getChild(1)).getType())), null, false)); + } + } + } + + RowResolver out_rwsch = new RowResolver(); + StringBuilder columns = new StringBuilder(); + StringBuilder columnTypes = new StringBuilder(); + + for (int i = 0; i < outputCols.size(); ++i) { + if (i != 0) { + columns.append(","); + columnTypes.append(","); + } + + columns.append(outputCols.get(i).getInternalName()); + columnTypes.append(outputCols.get(i).getType().getTypeName()); + + out_rwsch.put( + qb.getParseInfo().getAlias(), + outputCols.get(i).getInternalName(), + outputCols.get(i)); + } + + StringBuilder inpColumns = new StringBuilder(); + StringBuilder inpColumnTypes = new StringBuilder(); + Vector inputSchema = logicalPlan.getRowResolver(input).getColumnInfos(); + for (int i = 0; i < inputSchema.size(); ++i) { + if (i != 0) { + inpColumns.append(","); + inpColumnTypes.append(","); + } + + inpColumns.append(inputSchema.get(i).getInternalName()); + inpColumnTypes.append(inputSchema.get(i).getType().getTypeName()); + } + + tableDesc outInfo; + tableDesc inInfo; + String defaultSerdeName = hiveConf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE); + Class serde; + + try { + serde = (Class)Class.forName(defaultSerdeName, true, JavaUtils.getClassLoader()); + } catch (ClassNotFoundException e) { + throw new SemanticException(e); + } + + // Input and Output Serdes + if (trfm.getChild(inputSerDeNum).getChildCount() > 0) + inInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false); + else + inInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), inpColumns.toString(), inpColumnTypes.toString(), false, true); + + if (trfm.getChild(outputSerDeNum).getChildCount() > 0) + outInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false); + // This is for backward compatibility. If the user did not specify the output column list, we assume that there are 2 columns: key and value. + // However, if the script outputs: col1, col2, col3 seperated by TAB, the requirement is: key is col and value is (col2 TAB col3) + else + outInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), columns.toString(), columnTypes.toString(), defaultOutputCols); + + // Output record readers + Class outRecordReader = getRecordReader((ASTNode)trfm.getChild(outputRecordReaderNum)); + Class inRecordWriter = getRecordWriter((ASTNode)trfm.getChild(inputRecordWriterNum)); + + Operator output = logicalPlan.newChildOperator( + new scriptDesc(getFixedCmd(ParseUtils.stripQuotes(trfm.getChild(execPos).getText())), + inInfo, inRecordWriter, outInfo, outRecordReader), + out_rwsch, input); + + return output; + } + + @SuppressWarnings("unchecked") + private Class getRecordReader(ASTNode node) throws SemanticException { + String name; + + if (node.getChildCount() == 0) + name = hiveConf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER); + else + name = ParseUtils.unescapeSQLString(node.getChild(0).getText()); + + try { + return (Class)Class.forName(name, true, JavaUtils.getClassLoader()); + } catch (ClassNotFoundException e) { + throw new SemanticException(e); + } + } + + @SuppressWarnings("unchecked") + private Class getRecordWriter(ASTNode node) throws SemanticException { + String name; + + if (node.getChildCount() == 0) + name = hiveConf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER); + else + name = ParseUtils.unescapeSQLString(node.getChild(0).getText()); + + try { + return (Class)Class.forName(name, true, JavaUtils.getClassLoader()); + } catch (ClassNotFoundException e) { + throw new SemanticException(e); + } + } + + /** + * This function is a wrapper of parseInfo.getGroupByForClause which automatically + * translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY a,b,c. + */ + static List getGroupByForClause(QBParseInfo parseInfo, String dest) { + if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { + ASTNode selectExprs = parseInfo.getSelForClause(dest); + List result = new ArrayList(selectExprs == null + ? 0 : selectExprs.getChildCount()); + if (selectExprs != null) { + for (int i = 0; i < selectExprs.getChildCount(); ++i) { + // table.column AS alias + ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0); + result.add(grpbyExpr); + } + } + return result; + } else { + ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); + List result = new ArrayList(grpByExprs == null + ? 0 : grpByExprs.getChildCount()); + if (grpByExprs != null) { + for (int i = 0; i < grpByExprs.getChildCount(); ++i) { + ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); + result.add(grpbyExpr); + } + } + return result; + } + } + + private static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR) { + String colAlias = null; + String tabAlias = null; + String[] colRef = new String[2]; + + if (selExpr.getChildCount() == 2) { + // return zz for "xx + yy AS zz" + colAlias = ParseUtils.unescapeIdentifier(selExpr.getChild(1).getText()); + colRef[0] = tabAlias; + colRef[1] = colAlias; + return colRef; + } + + ASTNode root = (ASTNode) selExpr.getChild(0); + if (root.getType() == HiveParser.TOK_TABLE_OR_COL) { + colAlias = root.getChild(0).getText(); + colRef[0] = tabAlias; + colRef[1] = colAlias; + return colRef; + } + + if (root.getType() == HiveParser.DOT) { + ASTNode tab = (ASTNode) root.getChild(0); + if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) { + String t = ParseUtils.unescapeIdentifier(tab.getChild(0).getText()); + if (inputRR.hasTableAlias(t)) { + tabAlias = t; + } + } + + // Return zz for "xx.zz" and "xx.yy.zz" + ASTNode col = (ASTNode) root.getChild(1); + if (col.getType() == HiveParser.Identifier) { + colAlias = ParseUtils.unescapeIdentifier(col.getText()); + } + } + + if(colAlias == null) { + // Return defaultName if selExpr is not a simple xx.yy.zz + colAlias = defaultName; + } + + colRef[0] = tabAlias; + colRef[1] = colAlias; + return colRef; + } + + /** + * Returns whether the pattern is a regex expression (instead of a normal string). + * Normal string is a string with all alphabets/digits and "_". + */ + private static boolean isRegex(String pattern) { + for(int i=0; i genSelectPlan(String dest, QB qb, + Operator inputOp) + throws SemanticException { + + ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); + + ArrayList col_list = new ArrayList(); + RowResolver out_rwsch = new RowResolver(); + ASTNode trfm = null; + String alias = qb.getParseInfo().getAlias(); + Integer pos = Integer.valueOf(0); + RowResolver inputRR = logicalPlan.getRowResolver(inputOp); + // SELECT * or SELECT TRANSFORM(*) + boolean selectStar = false; + int posn = 0; + boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); + if (hintPresent) { + posn++; + } + + boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() + == HiveParser.TOK_TRANSFORM); + if (isInTransform) { + trfm = (ASTNode) selExprList.getChild(posn).getChild(0); + } + + // The list of expressions after SELECT or SELECT TRANSFORM. + ASTNode exprList = (isInTransform ? (ASTNode) trfm.getChild(0) : selExprList); + + LOG.debug("genSelectPlan: input = " + inputRR.toString()); + // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM) + for (int i = posn; i < exprList.getChildCount(); ++i) { + + // child can be EXPR AS ALIAS, or EXPR. + ASTNode child = (ASTNode) exprList.getChild(i); + boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); + // The real expression + ASTNode expr; + String tabAlias; + String colAlias; + + if (isInTransform) { + tabAlias = null; + colAlias = "_C" + i; + expr = child; + } else { + String[] colRef = getColAlias(child, "_C" + i, inputRR); + tabAlias = colRef[0]; + colAlias = colRef[1]; + // Get rid of TOK_SELEXPR + expr = (ASTNode)child.getChild(0); + } + + if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + pos = genColListRegex(".*", + expr.getChildCount() == 0 ? null : ParseUtils.unescapeIdentifier(expr.getChild(0).getText().toLowerCase()), + alias, expr, col_list, inputRR, pos, out_rwsch); + selectStar = true; + } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL + && !hasAsClause + && !inputRR.getIsExprResolver() + && isRegex(ParseUtils.unescapeIdentifier(expr.getChild(0).getText()))) { + // In case the expression is a regex COL. + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = genColListRegex(ParseUtils.unescapeIdentifier(expr.getChild(0).getText()), + null, alias, expr, col_list, inputRR, pos, out_rwsch); + } else if (expr.getType() == HiveParser.DOT + && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL + && inputRR.hasTableAlias(ParseUtils.unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase())) + && !hasAsClause + && !inputRR.getIsExprResolver() + && isRegex(ParseUtils.unescapeIdentifier(expr.getChild(1).getText()))) { + // In case the expression is TABLE.COL (col can be regex). + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = genColListRegex(ParseUtils.unescapeIdentifier(expr.getChild(1).getText()), + ParseUtils.unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), + alias, expr, col_list, inputRR, pos, out_rwsch); + } else { + // Case when this is an expression + exprNodeDesc exp = genExprNodeDesc(expr, inputRR); + col_list.add(exp); + if (!StringUtils.isEmpty(alias) && + (out_rwsch.get(null, colAlias) != null)) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(expr.getChild(1))); + } + out_rwsch.put(tabAlias, colAlias, + new ColumnInfo(getColumnInternalName(pos), + exp.getTypeInfo(), tabAlias, false)); + pos = Integer.valueOf(pos.intValue() + 1); + } + } + selectStar = selectStar && exprList.getChildCount() == posn + 1; + + ArrayList columnNames = new ArrayList(); + Map colExprMap = new HashMap(); + for (int i=0; i outputOp = logicalPlan.newChildOperator( + new selectDesc(col_list, columnNames, selectStar), out_rwsch, inputOp); + outputOp.setColumnExprMap(colExprMap); + + if (isInTransform) { + outputOp = genScriptPlan(trfm, qb, outputOp); + } + + LOG.debug("Created Select Plan for clause: " + dest + " row schema: " + out_rwsch.toString()); + + return outputOp; + } + + + + + /** + * Class to store GenericUDAF related information. + */ + static class GenericUDAFInfo { + ArrayList convertedParameters; + GenericUDAFEvaluator genericUDAFEvaluator; + TypeInfo returnType; + } + + /** + * Convert exprNodeDesc array to Typeinfo array. + */ + static ArrayList getTypeInfo(ArrayList exprs) { + ArrayList result = new ArrayList(); + for(exprNodeDesc expr: exprs) { + result.add(expr.getTypeInfo()); + } + return result; + } + + /** + * Convert exprNodeDesc array to Typeinfo array. + */ + static ObjectInspector[] getStandardObjectInspector(ArrayList exprs) { + ObjectInspector[] result = new ObjectInspector[exprs.size()]; + for (int i=0; i aggParameters, + ASTNode aggTree) throws SemanticException { + ArrayList originalParameterTypeInfos = getTypeInfo(aggParameters); + GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( + aggName, originalParameterTypeInfos); + if (null == result) { + String reason = "Looking for UDAF Evaluator\"" + aggName + "\" with parameters " + + originalParameterTypeInfos; + throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE. + getMsg((ASTNode)aggTree.getChild(0), reason)); + } + return result; + } + + /** + * Returns the GenericUDAFInfo struct for the aggregation. + * @param aggName The name of the UDAF. + * @param aggParameters The exprNodeDesc of the original parameters + * @param aggTree The ASTNode node of the UDAF in the query. + * @return GenericUDAFInfo + * @throws SemanticException when the UDAF is not found or has problems. + */ + static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, + GenericUDAFEvaluator.Mode emode, ArrayList aggParameters) + throws SemanticException { + + GenericUDAFInfo r = new GenericUDAFInfo(); + + // set r.genericUDAFEvaluator + r.genericUDAFEvaluator = evaluator; + + // set r.returnType + ObjectInspector returnOI = null; + try { + ObjectInspector[] aggObjectInspectors = + getStandardObjectInspector(getTypeInfo(aggParameters)); + returnOI = r.genericUDAFEvaluator.init(emode, aggObjectInspectors); + r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI); + } catch (HiveException e) { + throw new SemanticException(e); + } + // set r.convertedParameters + // TODO: type conversion + r.convertedParameters = aggParameters; + + return r; + } + + private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(groupByDesc.Mode mode, boolean isDistinct) { + switch (mode) { + case COMPLETE: + return GenericUDAFEvaluator.Mode.COMPLETE; + case PARTIAL1: + return GenericUDAFEvaluator.Mode.PARTIAL1; + case PARTIAL2: + return GenericUDAFEvaluator.Mode.PARTIAL2; + case PARTIALS: + return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1 : GenericUDAFEvaluator.Mode.PARTIAL2; + case FINAL: + return GenericUDAFEvaluator.Mode.FINAL; + case HASH: + return GenericUDAFEvaluator.Mode.PARTIAL1; + case MERGEPARTIAL: + return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE : GenericUDAFEvaluator.Mode.FINAL; + default: + throw new RuntimeException("internal error in groupByDescModeToUDAFMode"); + } + } + /** + * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). + * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. + * + * @param mode The mode of the aggregation (PARTIAL1 or COMPLETE) + * @param genericUDAFEvaluators If not null, this function will store the mapping + * from Aggregation StringTree to the genericUDAFEvaluator in this parameter, + * so it can be used in the next-stage GroupBy aggregations. + * @return the new GroupByOperator + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanGroupByOperator( + QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, + groupByDesc.Mode mode, Map genericUDAFEvaluators) + throws SemanticException { + RowResolver groupByInputRowResolver = logicalPlan.getRowResolver(reduceSinkOperatorInfo); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + ArrayList groupByKeys = new ArrayList(); + ArrayList aggregations = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(); + Map colExprMap = new HashMap(); + List grpByExprs = getGroupByForClause(parseInfo, dest); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + String text = grpbyExpr.toStringTree(); + ColumnInfo exprInfo = groupByInputRowResolver.get("",text); + + if (exprInfo == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); + } + + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), + exprInfo.getInternalName(), "", false)); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), + new ColumnInfo(field, exprInfo.getType(), null, false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + // For each aggregation + HashMap aggregationTrees = parseInfo + .getAggregationExprsForClause(dest); + assert (aggregationTrees != null); + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + + // This is the GenericUDAF name + String aggName = value.getChild(0).getText(); + + // Convert children to aggParameters + ArrayList aggParameters = new ArrayList(); + // 0 is the function name + for (int i = 1; i < value.getChildCount(); i++) { + String text = value.getChild(i).toStringTree(); + ASTNode paraExpr = (ASTNode)value.getChild(i); + ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); + if (paraExprInfo == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr)); + } + + String paraExpression = paraExprInfo.getInternalName(); + assert(paraExpression != null); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), + paraExprInfo.getInternalName(), + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); + } + + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); + assert(genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, + isDistinct, amode)); + String field = getColumnInternalName(groupByKeys.size() + aggregations.size() -1); + outputColumnNames.add(field); + groupByOutputRowResolver.put("",value.toStringTree(), + new ColumnInfo(field, + udaf.returnType, "", false)); + // Save the evaluator so that it can be used by the next-stage GroupByOperators + if (genericUDAFEvaluators != null) { + genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); + } + } + + Operator op = logicalPlan.newChildOperator( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), + groupByOutputRowResolver, reduceSinkOperatorInfo); + op.setColumnExprMap(colExprMap); + + return op; + } + + /** + * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). + * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. + * + * @param mode The mode of the aggregation (MERGEPARTIAL, PARTIAL2) + * @param genericUDAFEvaluators The mapping from Aggregation StringTree to the + * genericUDAFEvaluator. + * @param distPartAggr partial aggregation for distincts + * @return the new GroupByOperator + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanGroupByOperator1( + QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, + groupByDesc.Mode mode, Map genericUDAFEvaluators, boolean distPartAgg) + throws SemanticException { + ArrayList outputColumnNames = new ArrayList(); + RowResolver groupByInputRowResolver = logicalPlan.getRowResolver(reduceSinkOperatorInfo); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + ArrayList groupByKeys = new ArrayList(); + ArrayList aggregations = new ArrayList(); + List grpByExprs = getGroupByForClause(parseInfo, dest); + Map colExprMap = new HashMap(); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + String text = grpbyExpr.toStringTree(); + ColumnInfo exprInfo = groupByInputRowResolver.get("",text); + + if (exprInfo == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); + } + + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), + exprInfo.getInternalName(), + exprInfo.getTabAlias(), + exprInfo.getIsPartitionCol())); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), + new ColumnInfo(field, exprInfo.getType(), "", false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + + HashMap aggregationTrees = parseInfo + .getAggregationExprsForClause(dest); + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + String aggName = value.getChild(0).getText(); + ArrayList aggParameters = new ArrayList(); + + // If the function is distinct, partial aggregartion has not been done on the client side. + // If distPartAgg is set, the client is letting us know that partial aggregation has not been done. + // For eg: select a, count(b+c), count(distinct d+e) group by a + // For count(b+c), if partial aggregation has been performed, then we directly look for count(b+c). + // Otherwise, we look for b+c. + // For distincts, partial aggregation is never performed on the client side, so always look for the parameters: d+e + boolean partialAggDone = !(distPartAgg || (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI)); + if (!partialAggDone) { + // 0 is the function name + for (int i = 1; i < value.getChildCount(); i++) { + String text = value.getChild(i).toStringTree(); + ASTNode paraExpr = (ASTNode)value.getChild(i); + ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); + if (paraExprInfo == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr)); + } + + String paraExpression = paraExprInfo.getInternalName(); + assert(paraExpression != null); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), + paraExprInfo.getInternalName(), + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); + } + } + else { + String text = entry.getKey(); + ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); + if (paraExprInfo == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value)); + } + String paraExpression = paraExprInfo.getInternalName(); + assert(paraExpression != null); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); + } + boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI); + Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = null; + // For distincts, partial aggregations have not been done + if (distPartAgg) { + genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); + assert(genericUDAFEvaluator != null); + genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); + } + else { + genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey()); + assert(genericUDAFEvaluator != null); + } + + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, + (mode != groupByDesc.Mode.FINAL && isDistinct), amode)); + String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); + outputColumnNames.add(field); + groupByOutputRowResolver.put("", value.toStringTree(), + new ColumnInfo(field, udaf.returnType, "", false)); + } + + Operator op = logicalPlan.newChildOperator( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, distPartAgg), + groupByOutputRowResolver, reduceSinkOperatorInfo); + op.setColumnExprMap(colExprMap); + + return op; + } + + /** + * Generate the map-side GroupByOperator for the Query Block (qb.getParseInfo().getXXX(dest)). + * The new GroupByOperator will be a child of the inputOperatorInfo. + * + * @param mode The mode of the aggregation (HASH) + * @param genericUDAFEvaluators If not null, this function will store the mapping + * from Aggregation StringTree to the genericUDAFEvaluator in this parameter, + * so it can be used in the next-stage GroupBy aggregations. + * @return the new GroupByOperator + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, Operator inputOperatorInfo, + groupByDesc.Mode mode, Map genericUDAFEvaluators) throws SemanticException { + + RowResolver groupByInputRowResolver = logicalPlan.getRowResolver(inputOperatorInfo); + QBParseInfo parseInfo = qb.getParseInfo(); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + ArrayList groupByKeys = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(); + ArrayList aggregations = new ArrayList(); + Map colExprMap = new HashMap(); + List grpByExprs = getGroupByForClause(parseInfo, dest); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + exprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver); + + groupByKeys.add(grpByExprNode); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), + new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + + // If there is a distinctFuncExp, add all parameters to the reduceKeys. + if (parseInfo.getDistinctFuncExprForClause(dest) != null) { + ASTNode value = parseInfo.getDistinctFuncExprForClause(dest); + int numDistn=0; + // 0 is function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode parameter = (ASTNode) value.getChild(i); + String text = parameter.toStringTree(); + if (groupByOutputRowResolver.get("",text) == null) { + exprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver); + groupByKeys.add(distExprNode); + numDistn++; + String field = getColumnInternalName(grpByExprs.size() + numDistn -1); + outputColumnNames.add(field); + groupByOutputRowResolver.put("", text, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + } + } + + // For each aggregation + HashMap aggregationTrees = parseInfo.getAggregationExprsForClause(dest); + assert (aggregationTrees != null); + + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + String aggName = value.getChild(0).getText(); + ArrayList aggParameters = new ArrayList(); + // 0 is the function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode paraExpr = (ASTNode)value.getChild(i); + exprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); + + aggParameters.add(paraExprNode); + } + + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); + + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); + assert(genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, + isDistinct, amode)); + String field = getColumnInternalName(groupByKeys.size() + aggregations.size() -1); + outputColumnNames.add(field); + groupByOutputRowResolver.put("",value.toStringTree(), + new ColumnInfo(field, + udaf.returnType, "", false)); + // Save the evaluator so that it can be used by the next-stage GroupByOperators + if (genericUDAFEvaluators != null) { + genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); + } + } + + Operator op = logicalPlan.newChildOperator( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), + groupByOutputRowResolver, inputOperatorInfo); + op.setColumnExprMap(colExprMap); + + return op; + } + + + /** + * Generate the ReduceSinkOperator for the Group By Query Block (qb.getPartInfo().getXXX(dest)). + * The new ReduceSinkOperator will be a child of inputOperatorInfo. + * + * It will put all Group By keys and the distinct field (if any) in the map-reduce sort key, + * and all other fields in the map-reduce value. + * + * @param numPartitionFields the number of fields for map-reduce partitioning. + * This is usually the number of fields in the Group By keys. + * @return the new ReduceSinkOperator. + * @throws SemanticException + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanReduceSinkOperator(QB qb, + String dest, Operator inputOperatorInfo, int numPartitionFields, int numReducers, boolean mapAggrDone) throws SemanticException { + + RowResolver reduceSinkInputRowResolver = logicalPlan.getRowResolver(inputOperatorInfo); + QBParseInfo parseInfo = qb.getParseInfo(); + RowResolver reduceSinkOutputRowResolver = new RowResolver(); + reduceSinkOutputRowResolver.setIsExprResolver(true); + Map colExprMap = new HashMap(); + ArrayList reduceKeys = new ArrayList(); + // Pre-compute group-by keys and store in reduceKeys + + List outputColumnNames = new ArrayList(); + List grpByExprs = getGroupByForClause(parseInfo, dest); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + exprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr, reduceSinkInputRowResolver); + reduceKeys.add(inputExpr); + String text = grpbyExpr.toStringTree(); + if (reduceSinkOutputRowResolver.get("", text) == null) { + outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); + String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, + reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); + reduceSinkOutputRowResolver.put("", text, colInfo); + colExprMap.put(colInfo.getInternalName(), inputExpr); + } else { + throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY.getMsg(grpbyExpr)); + } + } + + // If there is a distinctFuncExp, add all parameters to the reduceKeys. + if (parseInfo.getDistinctFuncExprForClause(dest) != null) { + ASTNode value = parseInfo.getDistinctFuncExprForClause(dest); + // 0 is function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode parameter = (ASTNode) value.getChild(i); + String text = parameter.toStringTree(); + if (reduceSinkOutputRowResolver.get("",text) == null) { + reduceKeys.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); + outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); + String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, + reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); + reduceSinkOutputRowResolver.put("", text, colInfo); + colExprMap.put(colInfo.getInternalName(), reduceKeys.get(reduceKeys.size()-1)); + } + } + } + + ArrayList reduceValues = new ArrayList(); + HashMap aggregationTrees = parseInfo.getAggregationExprsForClause(dest); + + if (!mapAggrDone) { + // Put parameters to aggregations in reduceValues + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + // 0 is function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode parameter = (ASTNode) value.getChild(i); + String text = parameter.toStringTree(); + if (reduceSinkOutputRowResolver.get("",text) == null) { + reduceValues.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); + outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); + String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); + reduceSinkOutputRowResolver.put("", text, + new ColumnInfo(field, + reduceValues.get(reduceValues.size()-1).getTypeInfo(), + null, false)); + } + } + } + } + else + { + // Put partial aggregation results in reduceValues + int inputField = reduceKeys.size(); + + for (Map.Entry entry : aggregationTrees.entrySet()) { + + TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType(); + reduceValues.add(new exprNodeColumnDesc(type, getColumnInternalName(inputField), + "", false)); + inputField++; + outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); + String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); + reduceSinkOutputRowResolver.put("", ((ASTNode)entry.getValue()).toStringTree(), + new ColumnInfo(field, + type, null, false)); + } + } + + ReduceSinkOperator rsOp = (ReduceSinkOperator) logicalPlan.newChildOperator( + PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, numReducers), + reduceSinkOutputRowResolver, inputOperatorInfo); + rsOp.setColumnExprMap(colExprMap); + + return rsOp; + } + + /** + * Generate the second ReduceSinkOperator for the Group By Plan (parseInfo.getXXX(dest)). + * The new ReduceSinkOperator will be a child of groupByOperatorInfo. + * + * The second ReduceSinkOperator will put the group by keys in the map-reduce sort + * key, and put the partial aggregation results in the map-reduce value. + * + * @param numPartitionFields the number of fields in the map-reduce partition key. + * This should always be the same as the number of Group By keys. We should be + * able to remove this parameter since in this phase there is no distinct any more. + * @return the new ReduceSinkOperator. + * @throws SemanticException + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanReduceSinkOperator2MR( + QBParseInfo parseInfo, String dest, Operator groupByOperatorInfo, int numPartitionFields, int numReducers) + throws SemanticException { + RowResolver reduceSinkInputRowResolver2 = logicalPlan.getRowResolver(groupByOperatorInfo); + RowResolver reduceSinkOutputRowResolver2 = new RowResolver(); + reduceSinkOutputRowResolver2.setIsExprResolver(true); + Map colExprMap = new HashMap(); + ArrayList reduceKeys = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(); + // Get group-by keys and store in reduceKeys + List grpByExprs = getGroupByForClause(parseInfo, dest); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", grpbyExpr.toStringTree()).getType(); + exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(typeInfo, field, "", false); + reduceKeys.add(inputExpr); + ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + field, + typeInfo, "", false); + reduceSinkOutputRowResolver2.put("", grpbyExpr.toStringTree(), + colInfo); + colExprMap.put(colInfo.getInternalName(), inputExpr); + } + // Get partial aggregation results and store in reduceValues + ArrayList reduceValues = new ArrayList(); + int inputField = reduceKeys.size(); + HashMap aggregationTrees = parseInfo + .getAggregationExprsForClause(dest); + for (Map.Entry entry : aggregationTrees.entrySet()) { + String field = getColumnInternalName(inputField); + ASTNode t = entry.getValue(); + TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", t.toStringTree()).getType(); + reduceValues.add(new exprNodeColumnDesc(typeInfo, field, "", false)); + inputField++; + String col = getColumnInternalName(reduceValues.size()-1); + outputColumnNames.add(col); + reduceSinkOutputRowResolver2.put("", t.toStringTree(), + new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, + typeInfo, "", false)); + } + + ReduceSinkOperator rsOp = (ReduceSinkOperator) logicalPlan.newChildOperator( + PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, numReducers), + reduceSinkOutputRowResolver2, groupByOperatorInfo); + rsOp.setColumnExprMap(colExprMap); + + return rsOp; + } + + /** + * Generate the second GroupByOperator for the Group By Plan (parseInfo.getXXX(dest)). + * The new GroupByOperator will do the second aggregation based on the partial aggregation + * results. + * + * @param mode the mode of aggregation (FINAL) + * @param genericUDAFEvaluators The mapping from Aggregation StringTree to the + * genericUDAFEvaluator. + * @return the new GroupByOperator + * @throws SemanticException + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanGroupByOperator2MR( + QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, + groupByDesc.Mode mode, Map genericUDAFEvaluators) + throws SemanticException { + RowResolver groupByInputRowResolver2 = logicalPlan.getRowResolver(reduceSinkOperatorInfo2); + RowResolver groupByOutputRowResolver2 = new RowResolver(); + groupByOutputRowResolver2.setIsExprResolver(true); + ArrayList groupByKeys = new ArrayList(); + ArrayList aggregations = new ArrayList(); + Map colExprMap = new HashMap(); + List grpByExprs = getGroupByForClause(parseInfo, dest); + ArrayList outputColumnNames = new ArrayList(); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + String text = grpbyExpr.toStringTree(); + ColumnInfo exprInfo = groupByInputRowResolver2.get("",text); + if (exprInfo == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); + } + + String expression = exprInfo.getInternalName(); + groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), expression, + exprInfo.getTabAlias(), + exprInfo.getIsPartitionCol())); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + groupByOutputRowResolver2.put("",grpbyExpr.toStringTree(), + new ColumnInfo(field, exprInfo.getType(), "", false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + HashMap aggregationTrees = parseInfo + .getAggregationExprsForClause(dest); + for (Map.Entry entry : aggregationTrees.entrySet()) { + ArrayList aggParameters = new ArrayList(); + ASTNode value = entry.getValue(); + String text = entry.getKey(); + ColumnInfo paraExprInfo = groupByInputRowResolver2.get("",text); + if (paraExprInfo == null) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value)); + } + String paraExpression = paraExprInfo.getInternalName(); + assert(paraExpression != null); + aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, + paraExprInfo.getTabAlias(), + paraExprInfo.getIsPartitionCol())); + + String aggName = value.getChild(0).getText(); + + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey()); + assert(genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, + (mode != groupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), + amode)); + String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); + outputColumnNames.add(field); + groupByOutputRowResolver2.put("", value.toStringTree(), + new ColumnInfo(field, + udaf.returnType, "", false)); + } + + Operator op = logicalPlan.newChildOperator( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), + groupByOutputRowResolver2, reduceSinkOperatorInfo2); + op.setColumnExprMap(colExprMap); + + return op; + } + + /** + * Generate a Group-By plan using a single map-reduce job (3 operators will be + * inserted): + * + * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, + * A2_EXP) ) SortGroupBy (keys = (KEY.0,KEY.1), aggregations = + * (count_distinct(KEY.2), sum(VALUE.0), count(VALUE.1))) Select (final + * selects) + * + * @param dest + * @param qb + * @param input + * @return + * @throws SemanticException + * + * Generate a Group-By plan using 1 map-reduce job. + * Spray by the group by key, and sort by the distinct key (if any), and + * compute aggregates * + * The agggregation evaluation functions are as follows: + * Partitioning Key: + * grouping key + * + * Sorting Key: + * grouping key if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Reducer: iterate/merge + * (mode = COMPLETE) + **/ + @SuppressWarnings({"nls"}) + private Operator genGroupByPlan1MR(String dest, QB qb, + Operator input) throws SemanticException { + + QBParseInfo parseInfo = qb.getParseInfo(); + + int numReducers = -1; + List grpByExprs = getGroupByForClause(parseInfo, dest); + if (grpByExprs.isEmpty()) + numReducers = 1; + + // ////// 1. Generate ReduceSinkOperator + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( + qb, dest, input, grpByExprs.size(), numReducers, false); + + // ////// 2. Generate GroupbyOperator + Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, + dest, reduceSinkOperatorInfo, groupByDesc.Mode.COMPLETE, null); + + return groupByOperatorInfo; + } + + static ArrayList getUDAFEvaluators(ArrayList aggs) { + ArrayList result = new ArrayList(); + for (int i=0; i genGroupByPlan2MRMultiGroupBy(String dest, QB qb, + Operator input) throws SemanticException { + + // ////// Generate GroupbyOperator for a map-side partial aggregation + Map genericUDAFEvaluators = + new LinkedHashMap(); + + QBParseInfo parseInfo = qb.getParseInfo(); + + // ////// 2. Generate GroupbyOperator + Operator groupByOperatorInfo = + genGroupByPlanGroupByOperator1(parseInfo, dest, input, groupByDesc.Mode.HASH, genericUDAFEvaluators, true); + + int numReducers = -1; + List grpByExprs = getGroupByForClause(parseInfo, dest); + + // ////// 3. Generate ReduceSinkOperator2 + Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( + parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); + + // ////// 4. Generate GroupbyOperator2 + Operator groupByOperatorInfo2 = + genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, genericUDAFEvaluators); + + return groupByOperatorInfo2; + } + + /** + * Generate a Group-By plan using a 2 map-reduce jobs (5 operators will be + * inserted): + * + * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, + * A2_EXP) ) NOTE: If DISTINCT_EXP is null, partition by rand() SortGroupBy + * (keys = (KEY.0,KEY.1), aggregations = (count_distinct(KEY.2), sum(VALUE.0), + * count(VALUE.1))) ReduceSink ( keys = (0,1), values=(2,3,4)) SortGroupBy + * (keys = (KEY.0,KEY.1), aggregations = (sum(VALUE.0), sum(VALUE.1), + * sum(VALUE.2))) Select (final selects) + * + * @param dest + * @param qb + * @param input + * @return + * @throws SemanticException + * + * Generate a Group-By plan using a 2 map-reduce jobs. + * Spray by the grouping key and distinct key (or a random number, if no distinct is + * present) in hope of getting a uniform distribution, and compute partial aggregates + * grouped by the reduction key (grouping key + distinct key). + * Evaluate partial aggregates first, and spray by the grouping key to compute actual + * aggregates in the second phase. + * The agggregation evaluation functions are as follows: + * Partitioning Key: + * random() if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Sorting Key: + * grouping key if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Reducer: iterate/terminatePartial + * (mode = PARTIAL1) + * + * STAGE 2 + * + * Partitioning Key: + * grouping key + * + * Sorting Key: + * grouping key if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Reducer: merge/terminate + * (mode = FINAL) + */ + @SuppressWarnings("nls") + private Operator genGroupByPlan2MR(String dest, QB qb, + Operator input) throws SemanticException { + + QBParseInfo parseInfo = qb.getParseInfo(); + + // ////// 1. Generate ReduceSinkOperator + // There is a special case when we want the rows to be randomly distributed to + // reducers for load balancing problem. That happens when there is no DISTINCT + // operator. We set the numPartitionColumns to -1 for this purpose. This is + // captured by WritableComparableHiveObject.hashCode() function. + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( + qb, dest, input, (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 + : Integer.MAX_VALUE), -1, false); + + // ////// 2. Generate GroupbyOperator + Map genericUDAFEvaluators = + new LinkedHashMap(); + GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanGroupByOperator(parseInfo, + dest, reduceSinkOperatorInfo, groupByDesc.Mode.PARTIAL1, genericUDAFEvaluators); + + int numReducers = -1; + List grpByExprs = getGroupByForClause(parseInfo, dest); + if (grpByExprs.isEmpty()) + numReducers = 1; + + // ////// 3. Generate ReduceSinkOperator2 + Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( + parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); + + // ////// 4. Generate GroupbyOperator2 + Operator groupByOperatorInfo2 = + genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, + groupByDesc.Mode.FINAL, genericUDAFEvaluators); + + return groupByOperatorInfo2; + } + + private boolean optimizeMapAggrGroupBy(String dest, QB qb) { + List grpByExprs = getGroupByForClause(qb.getParseInfo(), dest); + if ((grpByExprs != null) && !grpByExprs.isEmpty()) + return false; + + if (qb.getParseInfo().getDistinctFuncExprForClause(dest) != null) + return false; + + return true; + } + + /** + * Generate a Group-By plan using 1 map-reduce job. + * First perform a map-side partial aggregation (to reduce the amount of data), at this + * point of time, we may turn off map-side partial aggregation based on its performance. + * Then spray by the group by key, and sort by the distinct key (if any), and + * compute aggregates based on actual aggregates + * + * The agggregation evaluation functions are as follows: + * Mapper: iterate/terminatePartial + * (mode = HASH) + * + * Partitioning Key: + * grouping key + * + * Sorting Key: + * grouping key if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Reducer: iterate/terminate if DISTINCT + * merge/terminate if NO DISTINCT + * (mode = MERGEPARTIAL) + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanMapAggr1MR(String dest, QB qb, + Operator inputOperatorInfo) throws SemanticException { + + QBParseInfo parseInfo = qb.getParseInfo(); + + // ////// Generate GroupbyOperator for a map-side partial aggregation + Map genericUDAFEvaluators = + new LinkedHashMap(); + GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanMapGroupByOperator(qb, + dest, inputOperatorInfo, groupByDesc.Mode.HASH, genericUDAFEvaluators); + + int numReducers = -1; + + // Optimize the scenario when there are no grouping keys - only 1 reducer is needed + List grpByExprs = getGroupByForClause(parseInfo, dest); + if (grpByExprs.isEmpty()) + numReducers = 1; + + // ////// Generate ReduceSink Operator + Operator reduceSinkOperatorInfo = + genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, + grpByExprs.size(), numReducers, true); + + // This is a 1-stage map-reduce processing of the groupby. Tha map-side aggregates was just used to + // reduce output data. In case of distincts, partial results are not used, and so iterate is again + // invoked on the reducer. In case of non-distincts, partial results are used, and merge is invoked + // on the reducer. + return genGroupByPlanGroupByOperator1(parseInfo, dest, + reduceSinkOperatorInfo, groupByDesc.Mode.MERGEPARTIAL, + genericUDAFEvaluators, false); + } + + /** + * Generate a Group-By plan using a 2 map-reduce jobs. + * However, only 1 group-by plan is generated if the query involves no grouping key and + * no distincts. In that case, the plan is same as generated by genGroupByPlanMapAggr1MR. + * Otherwise, the following plan is generated: + * First perform a map side partial aggregation (to reduce the amount of data). Then + * spray by the grouping key and distinct key (or a random number, if no distinct is + * present) in hope of getting a uniform distribution, and compute partial aggregates + * grouped by the reduction key (grouping key + distinct key). + * Evaluate partial aggregates first, and spray by the grouping key to compute actual + * aggregates in the second phase. + * The agggregation evaluation functions are as follows: + * Mapper: iterate/terminatePartial + * (mode = HASH) + * + * Partitioning Key: + * random() if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Sorting Key: + * grouping key if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Reducer: iterate/terminatePartial if DISTINCT + * merge/terminatePartial if NO DISTINCT + * (mode = MERGEPARTIAL) + * + * STAGE 2 + * + * Partitioining Key: + * grouping key + * + * Sorting Key: + * grouping key if no DISTINCT + * grouping + distinct key if DISTINCT + * + * Reducer: merge/terminate + * (mode = FINAL) + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, + Operator inputOperatorInfo) throws SemanticException { + + QBParseInfo parseInfo = qb.getParseInfo(); + + // ////// Generate GroupbyOperator for a map-side partial aggregation + Map genericUDAFEvaluators = + new LinkedHashMap(); + GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanMapGroupByOperator(qb, + dest, inputOperatorInfo, groupByDesc.Mode.HASH, genericUDAFEvaluators); + + // Optimize the scenario when there are no grouping keys and no distinct - 2 map-reduce jobs are not needed + // For eg: select count(1) from T where t.ds = .... + if (!optimizeMapAggrGroupBy(dest, qb)) { + + // ////// Generate ReduceSink Operator + Operator reduceSinkOperatorInfo = + genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, + (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 + : Integer.MAX_VALUE), -1, true); + + // ////// Generate GroupbyOperator for a partial aggregation + Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, + dest, reduceSinkOperatorInfo, groupByDesc.Mode.PARTIALS, + genericUDAFEvaluators, false); + + int numReducers = -1; + List grpByExprs = getGroupByForClause(parseInfo, dest); + if (grpByExprs.isEmpty()) + numReducers = 1; + + // ////// Generate ReduceSinkOperator2 + Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(parseInfo, dest, groupByOperatorInfo2, + grpByExprs.size(), numReducers); + + // ////// Generate GroupbyOperator3 + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, genericUDAFEvaluators); + } + else { + // ////// Generate ReduceSink Operator + Operator reduceSinkOperatorInfo = + genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest).size(), 1, true); + + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.FINAL, genericUDAFEvaluators); + } + } + + @SuppressWarnings("nls") + private Operator genFileSinkPlan(String dest, QB qb, + Operator input) throws SemanticException { + + RowResolver inputRR = logicalPlan.getRowResolver(input); + QBMetaData qbm = qb.getMetaData(); + Integer dest_type = qbm.getDestTypeForAlias(dest); + + Table dest_tab; // destination table if any + String queryTmpdir; // the intermediate destination directory + Path dest_path; // the final destination directory + tableDesc table_desc = null; + int currentTableId = 0; + int destTableId; + boolean isLocal = false; + + switch (dest_type.intValue()) { + case QBMetaData.DEST_TABLE: + dest_tab = qbm.getDestTableForAlias(dest); + //check for partition + List parts = dest_tab.getTTable().getPartitionKeys(); + if(parts != null && parts.size() > 0) { + throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); + } + dest_path = dest_tab.getPath(); + queryTmpdir = getContext().getExternalTmpFileURI(dest_path.toUri()); + table_desc = Utilities.getTableDesc(dest_tab); + + destTableId = logicalPlan.getDestTableId(); + logicalPlan.setTableId(String.valueOf(destTableId), dest_tab.getName()); + currentTableId = destTableId; + logicalPlan.setDestTableId(destTableId++); + + // Create the work for moving the table + + logicalPlan.addLoadTableWork(new loadTableDesc(queryTmpdir, + getContext().getExternalTmpFileURI(dest_path.toUri()), + table_desc, + new HashMap())); + logicalPlan.addOutput(new WriteEntity(dest_tab)); + break; + case QBMetaData.DEST_PARTITION: + Partition dest_part = qbm.getDestPartitionForAlias(dest); + dest_tab = dest_part.getTable(); + dest_path = dest_part.getPath()[0]; + queryTmpdir = getContext().getExternalTmpFileURI(dest_path.toUri()); + table_desc = Utilities.getTableDesc(dest_tab); + + destTableId = logicalPlan.getDestTableId(); + logicalPlan.setTableId(String.valueOf(destTableId), dest_tab.getName()); + currentTableId = destTableId; + logicalPlan.setDestTableId(destTableId++); + + logicalPlan.addLoadTableWork(new loadTableDesc(queryTmpdir, + getContext().getExternalTmpFileURI(dest_path.toUri()), + table_desc, dest_part.getSpec())); + logicalPlan.addOutput(new WriteEntity(dest_part)); + break; + case QBMetaData.DEST_LOCAL_FILE: + isLocal = true; + // fall through + case QBMetaData.DEST_DFS_FILE: + dest_path = new Path(qbm.getDestFileForAlias(dest)); + String destStr = dest_path.toString(); + + if (isLocal) { + // for local directory - we always write to map-red intermediate + // store and then copy to local fs + queryTmpdir = getContext().getMRTmpFileURI(); + } else { + // otherwise write to the file system implied by the directory + // no copy is required. we may want to revisit this policy in future + + try { + Path qPath = FileUtils.makeQualified(dest_path, hiveConf); + queryTmpdir = getContext().getExternalTmpFileURI(qPath.toUri()); + } catch (Exception e) { + throw new SemanticException("Error creating temporary folder on: " + + dest_path, e); + } + } + String cols = new String(); + String colTypes = new String(); + Vector colInfos = inputRR.getColumnInfos(); + + // CTAS case: the file output format and serde are defined by the create table command + // rather than taking the default value + List field_schemas = null; + createTableDesc tblDesc = qb.getTableDesc(); + if ( tblDesc != null ) + field_schemas = new ArrayList(); + + boolean first = true; + for (ColumnInfo colInfo:colInfos) { + String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); + + if ( nm[1] != null ) { // non-null column alias + colInfo.setAlias(nm[1]); + } + + if ( field_schemas != null ) { + FieldSchema col = new FieldSchema(); + if ( nm[1] != null ) { + col.setName(colInfo.getAlias()); + } else { + col.setName(colInfo.getInternalName()); + } + col.setType(colInfo.getType().getTypeName()); + field_schemas.add(col); + } + + if (!first) { + cols = cols.concat(","); + colTypes = colTypes.concat(":"); + } + + first = false; + cols = cols.concat(colInfo.getInternalName()); + + // Replace VOID type with string when the output is a temp table or local files. + // A VOID type can be generated under the query: + // + // select NULL from tt; + // or + // insert overwrite local directory "abc" select NULL from tt; + // + // where there is no column type to which the NULL value should be converted. + // + String tName = colInfo.getType().getTypeName(); + if ( tName.equals(Constants.VOID_TYPE_NAME) ) + colTypes = colTypes.concat(Constants.STRING_TYPE_NAME); + else + colTypes = colTypes.concat(tName); + } + + // update the create table descriptor with the resulting schema. + if ( tblDesc != null ) + tblDesc.setCols(field_schemas); + + if (!getContext().isMRTmpFileURI(destStr)) { + destTableId = logicalPlan.getDestTableId(); + logicalPlan.setTableId( String.valueOf(destTableId), destStr); + currentTableId = destTableId; + logicalPlan.setDestTableId(destTableId++); + } + + boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE); + logicalPlan.addLoadFileWork(new loadFileDesc(queryTmpdir, destStr, isDfsDir, cols, colTypes)); + + if ( tblDesc == null ) { + table_desc = PlanUtils.getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), + cols, colTypes, false); + } else { + table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes); + } + + logicalPlan.addOutput(new WriteEntity(destStr, !isDfsDir)); + break; + default: + throw new SemanticException("Unknown destination type: " + dest_type); + } + + input = genConversionSelectOperator(dest, qb, input, table_desc); + inputRR = logicalPlan.getRowResolver(input); + + Vector vecCol = new Vector(); + + try { + StructObjectInspector rowObjectInspector = (StructObjectInspector)table_desc.getDeserializer().getObjectInspector(); + List fields = rowObjectInspector.getAllStructFieldRefs(); + for (int i=0; i output = logicalPlan.newChildOperator( + new fileSinkDesc(queryTmpdir, table_desc, hiveConf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId), + input); + output.setSchema(fsRS); + logicalPlan.setRowResolver(output, inputRR); + + /* + Operator output = putOpInsertMap( + OperatorFactory.getAndMakeChild( + new fileSinkDesc(queryTmpdir, table_desc, + hiveConf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId), + fsRS, input), inputRR); + */ + + LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + + dest_path + " row schema: " + + inputRR.toString()); + + return output; + } + + /** + * Generate the conversion SelectOperator that converts the columns into + * the types that are expected by the table_desc. + */ + Operator genConversionSelectOperator(String dest, QB qb, + Operator input, tableDesc table_desc) throws SemanticException { + StructObjectInspector oi = null; + try { + Deserializer deserializer = table_desc.getDeserializerClass().newInstance(); + deserializer.initialize(hiveConf, table_desc.getProperties()); + oi = (StructObjectInspector) deserializer.getObjectInspector(); + } catch (Exception e) { + throw new SemanticException(e); + } + + // Check column number + List tableFields = oi.getAllStructFieldRefs(); + Vector rowFields = logicalPlan.getRowResolver(input).getColumnInfos(); + if (tableFields.size() != rowFields.size()) { + String reason = "Table " + dest + " has " + tableFields.size() + " columns but query has " + + rowFields.size() + " columns."; + throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg( + qb.getParseInfo().getDestForClause(dest), reason)); + } + + // Check column types + boolean converted = false; + int columnNumber = tableFields.size(); + ArrayList expressions = new ArrayList(columnNumber); + // MetadataTypedColumnsetSerDe does not need type conversions because it does + // the conversion to String by itself. + boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class); + boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class); + if (!isMetaDataSerDe) { + for (int i=0; i colName = new ArrayList(); + for (int i=0; i output = logicalPlan.newChildOperator( + new selectDesc(expressions, colName), rowResolver, input); + + return output; + } else { + // not converted + return input; + } + } + + @SuppressWarnings("nls") + private Operator genLimitPlan( + String dest, QB qb, Operator input, int limit) + throws SemanticException { + // A map-only job can be optimized - instead of converting it to a map-reduce job, we can have another map + // job to do the same to avoid the cost of sorting in the map-reduce phase. A better approach would be to + // write into a local file and then have a map-only job. + // Add the limit operator to get the value fields + + RowResolver inputRR = logicalPlan.getRowResolver(input); + + + Operator limitOp = logicalPlan.newChildOperator( + new limitDesc(limit), inputRR, input); + + LOG.debug("Created LimitOperator Plan for clause: " + dest + " row schema: " + + inputRR.toString()); + + return limitOp; + } + + @SuppressWarnings("nls") + private Operator genLimitMapRedPlan( + String dest, QB qb, Operator input, int limit, boolean extraMRStep) + throws SemanticException { + // A map-only job can be optimized - instead of converting it to a map-reduce job, we can have another map + // job to do the same to avoid the cost of sorting in the map-reduce phase. A better approach would be to + // write into a local file and then have a map-only job. + // Add the limit operator to get the value fields + Operator curr = genLimitPlan(dest, qb, input, limit); + + // the client requested that an extra map-reduce step be performed + if (!extraMRStep) + return curr; + + // Create a reduceSink operator followed by another limit + curr = genReduceSinkPlan(dest, qb, curr, 1); + return genLimitPlan(dest, qb, curr, limit); + } + + @SuppressWarnings("nls") + private Operator genReduceSinkPlan(String dest, QB qb, + Operator input, int numReducers) + throws SemanticException { + + RowResolver inputRR = logicalPlan.getRowResolver(input); + + // First generate the expression for the partition and sort keys + // The cluster by clause / distribute by clause has the aliases for partition function + ASTNode partitionExprs = qb.getParseInfo().getClusterByForClause(dest); + if (partitionExprs == null) { + partitionExprs = qb.getParseInfo().getDistributeByForClause(dest); + } + ArrayList partitionCols = new ArrayList(); + if (partitionExprs != null) { + int ccount = partitionExprs.getChildCount(); + for(int i=0; i sortCols = new ArrayList(); + StringBuilder order = new StringBuilder(); + if (sortExprs != null) { + int ccount = sortExprs.getChildCount(); + for(int i=0; i colExprMap = new HashMap(); + ArrayList valueCols = new ArrayList(); + for(ColumnInfo colInfo: inputRR.getColumnInfos()) { + valueCols.add(new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsPartitionCol())); + colExprMap.put(colInfo.getInternalName(), valueCols.get(valueCols.size() - 1)); + } + + ArrayList outputColumns = new ArrayList(); + for (int i = 0; i < valueCols.size(); i++) { + outputColumns.add(getColumnInternalName(i)); + } + + Operator interim = logicalPlan.newChildOperator( + PlanUtils.getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1, partitionCols, order.toString(), numReducers), + inputRR, input); + interim.setColumnExprMap(colExprMap); + + // Add the extract operator to get the value fields + RowResolver out_rwsch = new RowResolver(); + RowResolver interim_rwsch = inputRR; + Integer pos = Integer.valueOf(0); + for(ColumnInfo colInfo: interim_rwsch.getColumnInfos()) { + String [] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); + out_rwsch.put(info[0], info[1], + new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), info[0], false)); + pos = Integer.valueOf(pos.intValue() + 1); + } + + Operator output = logicalPlan.newChildOperator( + new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, + Utilities.ReduceField.VALUE.toString(), + "", false)), + out_rwsch, interim); + + LOG.debug("Created ReduceSink Plan for clause: " + dest + " row schema: " + + out_rwsch.toString()); + return output; + } + + private Operator genJoinOperatorChildren( + QBJoinTree join, Operator left, Operator[] right, + HashSet omitOpts) + throws SemanticException { + + RowResolver outputRS = new RowResolver(); + ArrayList outputColumnNames = new ArrayList(); + // all children are base classes + Operator[] rightOps = new Operator[right.length]; + int outputPos = 0; + + Map reversedExprs = new HashMap(); + HashMap> exprMap = new HashMap>(); + Map colExprMap = new HashMap(); + HashMap> posToAliasMap = new HashMap>(); + + for ( int pos = 0; pos < right.length; ++pos ) { + + Operator input = right[pos]; + if (input == null) + input = left; + + ArrayList keyDesc = new ArrayList(); + Byte tag = Byte.valueOf((byte)(((reduceSinkDesc)(input.getConf())).getTag())); + + // check whether this input operator produces output + if ( omitOpts == null || !omitOpts.contains(pos) ) { + // prepare output descriptors for the input opt + RowResolver inputRS = logicalPlan.getRowResolver(input); + Iterator keysIter = inputRS.getTableNames().iterator(); + Set aliases = posToAliasMap.get(pos); + if(aliases == null) { + aliases = new HashSet(); + posToAliasMap.put(pos, aliases); + } + while (keysIter.hasNext()) { + String key = keysIter.next(); + aliases.add(key); + HashMap map = inputRS.getFieldMap(key); + Iterator fNamesIter = map.keySet().iterator(); + while (fNamesIter.hasNext()) { + String field = fNamesIter.next(); + ColumnInfo valueInfo = inputRS.get(key, field); + keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(), + valueInfo.getInternalName(), + valueInfo.getTabAlias(), + valueInfo.getIsPartitionCol())); + + if (outputRS.get(key, field) == null) { + String colName = getColumnInternalName(outputPos); + outputPos++; + outputColumnNames.add(colName); + colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1)); + outputRS.put(key, field, new ColumnInfo(colName, + valueInfo.getType(), key, false)); + reversedExprs.put(colName, tag); + } + } + } + } + exprMap.put(tag, keyDesc); + rightOps[pos] = input; + } + + org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = new org.apache.hadoop.hive.ql.plan.joinCond[join.getJoinCond().length]; + for (int i = 0; i < join.getJoinCond().length; i++) { + joinCond condn = join.getJoinCond()[i]; + joinCondns[i] = new org.apache.hadoop.hive.ql.plan.joinCond(condn); + } + + joinDesc desc = new joinDesc(exprMap, outputColumnNames, joinCondns); + desc.setReversedExprs(reversedExprs); + + JoinOperator joinOp = (JoinOperator) logicalPlan.newChildOperator(desc, outputRS, rightOps); + joinOp.setColumnExprMap(colExprMap); + joinOp.setPosToAliasMap(posToAliasMap); + + return putOpInsertMap(joinOp, outputRS); + } + + @SuppressWarnings("nls") + private Operator genJoinReduceSinkChild(QB qb, QBJoinTree joinTree, + Operator child, String srcName, int pos) throws SemanticException { + RowResolver inputRS = logicalPlan.getRowResolver(child); + RowResolver outputRS = new RowResolver(); + ArrayList outputColumns = new ArrayList(); + ArrayList reduceKeys = new ArrayList(); + + // Compute join keys and store in reduceKeys + Vector exprs = joinTree.getExpressions().get(pos); + for (int i = 0; i < exprs.size(); i++) { + ASTNode expr = exprs.get(i); + reduceKeys.add(genExprNodeDesc(expr, inputRS)); + } + + // Walk over the input row resolver and copy in the output + ArrayList reduceValues = new ArrayList(); + Iterator tblNamesIter = inputRS.getTableNames().iterator(); + Map colExprMap = new HashMap(); + while (tblNamesIter.hasNext()) + { + String src = tblNamesIter.next(); + HashMap fMap = inputRS.getFieldMap(src); + for (Map.Entry entry : fMap.entrySet()) { + String field = entry.getKey(); + ColumnInfo valueInfo = entry.getValue(); + exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(valueInfo.getType(), + valueInfo.getInternalName(), + valueInfo.getTabAlias(), + valueInfo.getIsPartitionCol()); + reduceValues.add(inputExpr); + if (outputRS.get(src, field) == null) { + String col = getColumnInternalName(reduceValues.size() - 1); + outputColumns.add(col); + ColumnInfo newColInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + + col, + valueInfo.getType(), src, false); + colExprMap.put(newColInfo.getInternalName(), inputExpr); + outputRS.put(src, field, newColInfo); + } + } + } + + int numReds = -1; + + // Use only 1 reducer in case of cartesian product + if (reduceKeys.size() == 0) { + numReds = 1; + + // Cartesian product is not supported in strict mode + if (hiveConf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")) + throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg()); + } + + ReduceSinkOperator rsOp = (ReduceSinkOperator) logicalPlan.newChildOperator( + PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, joinTree.getNextTag(), reduceKeys.size(), numReds), + outputRS, child); + rsOp.setColumnExprMap(colExprMap); + + return rsOp; + } + + @SuppressWarnings("unchecked") + private Operator genJoinOperator(QB qb, QBJoinTree joinTree, + HashMap> map) throws SemanticException { + QBJoinTree leftChild = joinTree.getJoinSrc(); + Operator joinSrcOp = null; + if (leftChild != null) + { + Operator joinOp = genJoinOperator(qb, leftChild, map); + Vector filter = joinTree.getFilters().get(0); + for (ASTNode cond: filter) + joinOp = genFilterPlan(qb, cond, joinOp); + + joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinOp, null, 0); + } + + Operator[] srcOps = new Operator[joinTree.getBaseSrc().length]; + + HashSet omitOpts = null; // set of input to the join that should be omitted by the output + int pos = 0; + for (String src : joinTree.getBaseSrc()) { + if (src != null) { + Operator srcOp = map.get(src); + + // for left-semi join, generate an additional selection & group-by operator before ReduceSink + ArrayList fields = joinTree.getRHSSemijoinColumns(src); + if ( fields != null ) { + // the RHS table columns should be not be output from the join + if ( omitOpts == null ) { + omitOpts = new HashSet(); + } + omitOpts.add(pos); + + // generate a selection operator for group-by keys only + srcOp = insertSelectForSemijoin(fields, srcOp); + + // generate a groupby operator (HASH mode) for a map-side partial aggregation for semijoin + srcOp = genMapGroupByForSemijoin(qb, fields, srcOp, groupByDesc.Mode.HASH); + } + + // generate a ReduceSink operator for the join + srcOps[pos] = genJoinReduceSinkChild(qb, joinTree, srcOp, src, pos); + pos++; + } else { + assert pos == 0; + srcOps[pos++] = null; + } + } + + // Type checking and implicit type conversion for join keys + genJoinOperatorTypeCheck(joinSrcOp, srcOps); + + JoinOperator joinOp = (JoinOperator)genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts); + logicalPlan.addJoinTree(joinOp, joinTree); + return joinOp; + } + + /** + * Construct a selection operator for semijoin that filter out all fields other than the group by keys. + * + * @param fields list of fields need to be output + * @param input input operator + * @return the selection operator. + * @throws SemanticException + */ + private Operator insertSelectForSemijoin(ArrayList fields, Operator input) + throws SemanticException { + + RowResolver inputRR = logicalPlan.getRowResolver(input); + ArrayList colList = new ArrayList(); + ArrayList columnNames = new ArrayList(); + + // construct the list of columns that need to be projected + for (ASTNode field: fields) { + exprNodeColumnDesc exprNode = (exprNodeColumnDesc) genExprNodeDesc(field, inputRR); + colList.add(exprNode); + columnNames.add(exprNode.getColumn()); + } + + // create selection operator + Operator output = logicalPlan.newChildOperator( + new selectDesc(colList, columnNames, false), inputRR, input); + output.setColumnExprMap(input.getColumnExprMap()); + + return output; + } + + private Operator genMapGroupByForSemijoin(QB qb, + ArrayList fields, // the ASTNode of the join key "tab.col" + Operator inputOperatorInfo, + groupByDesc.Mode mode) + throws SemanticException { + + RowResolver groupByInputRowResolver = logicalPlan.getRowResolver(inputOperatorInfo); + RowResolver groupByOutputRowResolver = new RowResolver(); + ArrayList groupByKeys = new ArrayList(); + ArrayList outputColumnNames = new ArrayList(); + ArrayList aggregations = new ArrayList(); + Map colExprMap = new HashMap(); + + groupByOutputRowResolver.setIsExprResolver(true); // join keys should only be columns but not be expressions + + for (int i = 0; i < fields.size(); ++i) { + // get the group by keys to ColumnInfo + ASTNode colName = fields.get(i); + exprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver); + groupByKeys.add(grpByExprNode); + + // generate output column names + String field = getColumnInternalName(i); + outputColumnNames.add(field); + ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false); + groupByOutputRowResolver.put("", colName.toStringTree(), colInfo2); + + // establish mapping from the output column to the input column + colExprMap.put(field, grpByExprNode); + } + + // Generate group-by operator + Operator op = logicalPlan.newChildOperator( + new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), + groupByOutputRowResolver, inputOperatorInfo); + op.setColumnExprMap(colExprMap); + + return op; + } + + + + private void genJoinOperatorTypeCheck(Operator left, Operator[] right) throws SemanticException { + // keys[i] -> ArrayList for the i-th join operator key list + ArrayList> keys = new ArrayList>(); + int keyLength = 0; + for (int i=0; i oi = (i==0 && right[i] == null ? left : right[i]); + reduceSinkDesc now = ((ReduceSinkOperator)(oi)).getConf(); + if (i == 0) { + keyLength = now.getKeyCols().size(); + } else { + assert(keyLength == now.getKeyCols().size()); + } + keys.add(now.getKeyCols()); + } + // implicit type conversion hierarchy + for (int k = 0; k < keyLength; k++) { + // Find the common class for type conversion + TypeInfo commonType = keys.get(0).get(k).getTypeInfo(); + for(int i=1; i oi = (i==0 && right[i] == null ? left : right[i]); + reduceSinkDesc now = ((ReduceSinkOperator)(oi)).getConf(); + + now.setKeySerializeInfo( + PlanUtils.getReduceKeyTableDesc( + PlanUtils.getFieldSchemasFromColumnList(now.getKeyCols(), "joinkey"), + now.getOrder() + ) + ); + } + } + + + private Operator genJoinPlan(QB qb, HashMap> map) + throws SemanticException { + QBJoinTree joinTree = qb.getQbJoinTree(); + Operator joinOp = genJoinOperator(qb, joinTree, map); + return joinOp; + } + + /** + * Extract the filters from the join condition and push them on top of the source operators. This procedure + * traverses the query tree recursively, + */ + private void pushJoinFilters(QB qb, QBJoinTree joinTree, HashMap> map) throws SemanticException { + Vector> filters = joinTree.getFilters(); + if (joinTree.getJoinSrc() != null) + pushJoinFilters(qb, joinTree.getJoinSrc(), map); + + int pos = 0; + for (String src : joinTree.getBaseSrc()) { + if (src != null) { + Operator srcOp = map.get(src); + Vector filter = filters.get(pos); + for (ASTNode cond: filter) + srcOp = genFilterPlan(qb, cond, srcOp); + map.put(src, srcOp); + } + pos++; + } + } + + private List getMapSideJoinTables(QB qb) { + List cols = new ArrayList(); + ASTNode hints = qb.getParseInfo().getHints(); + for (int pos = 0; pos < hints.getChildCount(); pos++) { + ASTNode hint = (ASTNode)hints.getChild(pos); + if (((ASTNode)hint.getChild(0)).getToken().getType() == HiveParser.TOK_MAPJOIN) { + ASTNode hintTblNames = (ASTNode)hint.getChild(1); + int numCh = hintTblNames.getChildCount(); + for (int tblPos = 0; tblPos < numCh; tblPos++) { + String tblName = ((ASTNode)hintTblNames.getChild(tblPos)).getText().toLowerCase(); + if (!cols.contains(tblName)) + cols.add(tblName); + } + } + } + + return cols; + } + + private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree) + throws SemanticException { + QBJoinTree joinTree = new QBJoinTree(); + joinTree.setNoOuterJoin(false); + + joinTree.setExpressions(new Vector>()); + joinTree.setFilters(new Vector>()); + + // Create joinTree structures to fill them up later + Vector rightAliases = new Vector(); + Vector leftAliases = new Vector(); + Vector baseSrc = new Vector(); + Vector preserved = new Vector(); + + boolean lastPreserved = false; + int cols = -1; + + for(int i = 0; i < joinParseTree.getChildCount(); i++) { + ASTNode child = (ASTNode) joinParseTree.getChild(i); + + switch(child.getToken().getType()) { + case HiveParser.TOK_TABREF: + // Handle a table - populate aliases appropriately: + // leftAliases should contain the first table, rightAliases should + // contain all other tables and baseSrc should contain all tables + + String table_name = ParseUtils.unescapeIdentifier(child.getChild(0).getText()); + String alias = child.getChildCount() == 1 ? table_name : + ParseUtils.unescapeIdentifier(child.getChild(child.getChildCount()-1).getText().toLowerCase()); + + if (i == 0) { + leftAliases.add(alias); + joinTree.setLeftAlias(alias); + } else { + rightAliases.add(alias); + } + baseSrc.add(alias); + + preserved.add(lastPreserved); + lastPreserved = false; + break; + + case HiveParser.TOK_EXPLIST: + if (cols == -1 && child.getChildCount() != 0) { + cols = child.getChildCount(); + } else if(child.getChildCount() != cols) { + throw new SemanticException("Tables with different or invalid " + + "number of keys in UNIQUEJOIN"); + } + + Vector expressions = new Vector(); + Vector filt = new Vector(); + + for (Node exp: child.getChildren()) { + expressions.add((ASTNode)exp); + } + + joinTree.getExpressions().add(expressions); + joinTree.getFilters().add(filt); + break; + + case HiveParser.KW_PRESERVE: + lastPreserved = true; + break; + + case HiveParser.TOK_SUBQUERY: + throw new SemanticException("Subqueries are not supported in UNIQUEJOIN"); + + default: + throw new SemanticException("Unexpected UNIQUEJOIN structure"); + } + } + + joinTree.setBaseSrc(baseSrc.toArray(new String[0])); + joinTree.setLeftAliases(leftAliases.toArray(new String[0])); + joinTree.setRightAliases(rightAliases.toArray(new String[0])); + + joinCond[] condn = new joinCond[preserved.size()]; + for (int i = 0; i < condn.length; i++) { + condn[i] = new joinCond(preserved.get(i)); + } + joinTree.setJoinCond(condn); + + if (qb.getParseInfo().getHints() != null) { + parseStreamTables(joinTree, qb); + } + + return joinTree; + } + + private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree) + throws SemanticException { + QBJoinTree joinTree = new QBJoinTree(); + joinCond[] condn = new joinCond[1]; + + switch (joinParseTree.getToken().getType() ) { + case HiveParser.TOK_LEFTOUTERJOIN: + joinTree.setNoOuterJoin(false); + condn[0] = new joinCond(0, 1, joinType.LEFTOUTER); + break; + case HiveParser.TOK_RIGHTOUTERJOIN: + joinTree.setNoOuterJoin(false); + condn[0] = new joinCond(0, 1, joinType.RIGHTOUTER); + break; + case HiveParser.TOK_FULLOUTERJOIN: + joinTree.setNoOuterJoin(false); + condn[0] = new joinCond(0, 1, joinType.FULLOUTER); + break; + case HiveParser.TOK_LEFTSEMIJOIN: + joinTree.setNoSemiJoin(false); + condn[0] = new joinCond(0, 1, joinType.LEFTSEMI); + break; + default: + condn[0] = new joinCond(0, 1, joinType.INNER); + joinTree.setNoOuterJoin(true); + break; + } + + joinTree.setJoinCond(condn); + + ASTNode left = (ASTNode) joinParseTree.getChild(0); + ASTNode right = (ASTNode) joinParseTree.getChild(1); + + if ((left.getToken().getType() == HiveParser.TOK_TABREF) + || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)) { + String table_name = ParseUtils.unescapeIdentifier(left.getChild(0).getText()); + String alias = left.getChildCount() == 1 ? table_name : + ParseUtils.unescapeIdentifier(left.getChild(left.getChildCount()-1).getText().toLowerCase()); + joinTree.setLeftAlias(alias); + String[] leftAliases = new String[1]; + leftAliases[0] = alias; + joinTree.setLeftAliases(leftAliases); + String[] children = new String[2]; + children[0] = alias; + joinTree.setBaseSrc(children); + } + else if (ParseUtils.isJoinToken(left)) { + QBJoinTree leftTree = genJoinTree(qb, left); + joinTree.setJoinSrc(leftTree); + String[] leftChildAliases = leftTree.getLeftAliases(); + String leftAliases[] = new String[leftChildAliases.length + 1]; + for (int i = 0; i < leftChildAliases.length; i++) + leftAliases[i] = leftChildAliases[i]; + leftAliases[leftChildAliases.length] = leftTree.getRightAliases()[0]; + joinTree.setLeftAliases(leftAliases); + } else + assert (false); + + if ((right.getToken().getType() == HiveParser.TOK_TABREF) + || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)) { + String table_name = ParseUtils.unescapeIdentifier(right.getChild(0).getText()); + String alias = right.getChildCount() == 1 ? table_name : + ParseUtils.unescapeIdentifier(right.getChild(right.getChildCount()-1).getText().toLowerCase()); + String[] rightAliases = new String[1]; + rightAliases[0] = alias; + joinTree.setRightAliases(rightAliases); + String[] children = joinTree.getBaseSrc(); + if (children == null) + children = new String[2]; + children[1] = alias; + joinTree.setBaseSrc(children); + // remember rhs table for semijoin + if (joinTree.getNoSemiJoin() == false) { + joinTree.addRHSSemijoin(alias); + } + } else + assert false; + + Vector> expressions = new Vector>(); + expressions.add(new Vector()); + expressions.add(new Vector()); + joinTree.setExpressions(expressions); + + Vector> filters = new Vector>(); + filters.add(new Vector()); + filters.add(new Vector()); + joinTree.setFilters(filters); + + ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); + Vector leftSrc = new Vector(); + parseJoinCondition(joinTree, joinCond, leftSrc); + if (leftSrc.size() == 1) + joinTree.setLeftAlias(leftSrc.get(0)); + + // check the hints to see if the user has specified a map-side join. This will be removed later on, once the cost-based + // infrastructure is in place + if (qb.getParseInfo().getHints() != null) { + List mapSideTables = getMapSideJoinTables(qb); + List mapAliases = joinTree.getMapAliases(); + + for (String mapTbl : mapSideTables) { + boolean mapTable = false; + for (String leftAlias : joinTree.getLeftAliases()) { + if (mapTbl.equalsIgnoreCase(leftAlias)) + mapTable = true; + } + for (String rightAlias : joinTree.getRightAliases()) { + if (mapTbl.equalsIgnoreCase(rightAlias)) + mapTable = true; + } + + if (mapTable) { + if (mapAliases == null) { + mapAliases = new ArrayList(); + } + mapAliases.add(mapTbl); + joinTree.setMapSideJoin(true); + } + } + + joinTree.setMapAliases(mapAliases); + + parseStreamTables(joinTree, qb); + } + + return joinTree; + } + + private void parseStreamTables(QBJoinTree joinTree, QB qb) { + List streamAliases = joinTree.getStreamAliases(); + + for (Node hintNode: qb.getParseInfo().getHints().getChildren()) { + ASTNode hint = (ASTNode)hintNode; + if (hint.getChild(0).getType() == HiveParser.TOK_STREAMTABLE) { + for (int i = 0; i < hint.getChild(1).getChildCount(); i++) { + if (streamAliases == null) { + streamAliases = new ArrayList(); + } + streamAliases.add(hint.getChild(1).getChild(i).getText()); + } + } + } + + joinTree.setStreamAliases(streamAliases); + } + + private void mergeJoins(QB qb, QBJoinTree parent, QBJoinTree node, + QBJoinTree target, int pos) { + String[] nodeRightAliases = node.getRightAliases(); + String[] trgtRightAliases = target.getRightAliases(); + String[] rightAliases = new String[nodeRightAliases.length + + trgtRightAliases.length]; + + for (int i = 0; i < trgtRightAliases.length; i++) + rightAliases[i] = trgtRightAliases[i]; + for (int i = 0; i < nodeRightAliases.length; i++) + rightAliases[i + trgtRightAliases.length] = nodeRightAliases[i]; + target.setRightAliases(rightAliases); + + String[] nodeBaseSrc = node.getBaseSrc(); + String[] trgtBaseSrc = target.getBaseSrc(); + String[] baseSrc = new String[nodeBaseSrc.length + trgtBaseSrc.length - 1]; + + for (int i = 0; i < trgtBaseSrc.length; i++) + baseSrc[i] = trgtBaseSrc[i]; + for (int i = 1; i < nodeBaseSrc.length; i++) + baseSrc[i + trgtBaseSrc.length - 1] = nodeBaseSrc[i]; + target.setBaseSrc(baseSrc); + + Vector> expr = target.getExpressions(); + for (int i = 0; i < nodeRightAliases.length; i++) + expr.add(node.getExpressions().get(i + 1)); + + Vector> filter = target.getFilters(); + for (int i = 0; i < nodeRightAliases.length; i++) + filter.add(node.getFilters().get(i + 1)); + + if (node.getFilters().get(0).size() != 0) { + Vector filterPos = filter.get(pos); + filterPos.addAll(node.getFilters().get(0)); + } + + if (qb.getQbJoinTree() == node) + qb.setQbJoinTree(node.getJoinSrc()); + else + parent.setJoinSrc(node.getJoinSrc()); + + if (node.getNoOuterJoin() && target.getNoOuterJoin()) + target.setNoOuterJoin(true); + else + target.setNoOuterJoin(false); + + if (node.getNoSemiJoin() && target.getNoSemiJoin()) + target.setNoSemiJoin(true); + else + target.setNoSemiJoin(false); + + target.mergeRHSSemijoin(node); + + joinCond[] nodeCondns = node.getJoinCond(); + int nodeCondnsSize = nodeCondns.length; + joinCond[] targetCondns = target.getJoinCond(); + int targetCondnsSize = targetCondns.length; + joinCond[] newCondns = new joinCond[nodeCondnsSize + targetCondnsSize]; + for (int i = 0; i < targetCondnsSize; i++) + newCondns[i] = targetCondns[i]; + + for (int i = 0; i < nodeCondnsSize; i++) + { + joinCond nodeCondn = nodeCondns[i]; + if (nodeCondn.getLeft() == 0) + nodeCondn.setLeft(pos); + else + nodeCondn.setLeft(nodeCondn.getLeft() + targetCondnsSize); + nodeCondn.setRight(nodeCondn.getRight() + targetCondnsSize); + newCondns[targetCondnsSize + i] = nodeCondn; + } + + target.setJoinCond(newCondns); + if (target.isMapSideJoin()) { + assert node.isMapSideJoin(); + List mapAliases = target.getMapAliases(); + for (String mapTbl : node.getMapAliases()) + if (!mapAliases.contains(mapTbl)) + mapAliases.add(mapTbl); + target.setMapAliases(mapAliases); + } + } + + private int findMergePos(QBJoinTree node, QBJoinTree target) { + int res = -1; + String leftAlias = node.getLeftAlias(); + if (leftAlias == null) + return -1; + + Vector nodeCondn = node.getExpressions().get(0); + Vector targetCondn = null; + + if (leftAlias.equals(target.getLeftAlias())) + { + targetCondn = target.getExpressions().get(0); + res = 0; + } + else + for (int i = 0; i < target.getRightAliases().length; i++) { + if (leftAlias.equals(target.getRightAliases()[i])) { + targetCondn = target.getExpressions().get(i + 1); + res = i + 1; + break; + } + } + + if ((targetCondn == null) || (nodeCondn.size() != targetCondn.size())) + return -1; + + for (int i = 0; i < nodeCondn.size(); i++) + if (!nodeCondn.get(i).toStringTree().equals( + targetCondn.get(i).toStringTree())) + return -1; + + return res; + } + + private boolean mergeJoinNodes(QB qb, QBJoinTree parent, QBJoinTree node, + QBJoinTree target) { + if (target == null) + return false; + + int res = findMergePos(node, target); + if (res != -1) { + mergeJoins(qb, parent, node, target, res); + return true; + } + + return mergeJoinNodes(qb, parent, node, target.getJoinSrc()); + } + + private void mergeJoinTree(QB qb) { + QBJoinTree root = qb.getQbJoinTree(); + QBJoinTree parent = null; + while (root != null) { + boolean merged = mergeJoinNodes(qb, parent, root, root.getJoinSrc()); + + if (parent == null) { + if (merged) + root = qb.getQbJoinTree(); + else { + parent = root; + root = root.getJoinSrc(); + } + } else { + parent = parent.getJoinSrc(); + root = parent.getJoinSrc(); + } + } + } + + private Operator insertSelectAllPlanForGroupBy(String dest, Operator input) + throws SemanticException { + OpParseContext inputCtx = logicalPlan.getOpParseContext(input); + RowResolver inputRR = inputCtx.getRR(); + Vector columns = inputRR.getColumnInfos(); + ArrayList colList = new ArrayList(); + ArrayList columnNames = new ArrayList(); + for (int i = 0; i < columns.size(); i++) { + ColumnInfo col = columns.get(i); + colList.add(new exprNodeColumnDesc(col.getType(), col.getInternalName(), + col.getTabAlias(), col.getIsPartitionCol())); + columnNames.add(col.getInternalName()); + } + + Operator output = logicalPlan.newChildOperator( + new selectDesc(colList, columnNames, true), inputRR, input); + output.setColumnExprMap(input.getColumnExprMap()); + + return output; + } + + // Return the common distinct expression + // There should be more than 1 destination, with group bys in all of them. + private List getCommonDistinctExprs(QB qb, Operator input) { + RowResolver inputRR = logicalPlan.getRowResolver(input); + QBParseInfo qbp = qb.getParseInfo(); + + TreeSet ks = new TreeSet(); + ks.addAll(qbp.getClauseNames()); + + // Go over all the destination tables + if (ks.size() <= 1) + return null; + + List oldList = null; + List oldASTList = null; + + for (String dest : ks) { + // If a filter is present, common processing is not possible + if (qbp.getWhrForClause(dest) != null) + return null; + + if (qbp.getAggregationExprsForClause(dest).size() == 0 + && getGroupByForClause(qbp, dest).size() == 0) + return null; + + // All distinct expressions must be the same + ASTNode value = qbp.getDistinctFuncExprForClause(dest); + if (value == null) + return null; + + List currDestList = new ArrayList(); + List currASTList = new ArrayList(); + try { + // 0 is function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode parameter = (ASTNode) value.getChild(i); + currDestList.add(genExprNodeDesc(parameter, inputRR)); + currASTList.add(parameter); + } + } catch (SemanticException e) { + return null; + } + + if (oldList == null) { + oldList = currDestList; + oldASTList = currASTList; + } + else { + if (oldList.size() != currDestList.size()) + return null; + for (int pos = 0; pos < oldList.size(); pos++) + { + if (!oldList.get(pos).isSame(currDestList.get(pos))) + return null; + } + } + } + + return oldASTList; + } + + private Operator createCommonReduceSink(QB qb, Operator input) + throws SemanticException { + // Go over all the tables and extract the common distinct key + List distExprs = getCommonDistinctExprs(qb, input); + + QBParseInfo qbp = qb.getParseInfo(); + TreeSet ks = new TreeSet(); + ks.addAll(qbp.getClauseNames()); + + // Pass the entire row + RowResolver inputRR = logicalPlan.getRowResolver(input); + RowResolver reduceSinkOutputRowResolver = new RowResolver(); + reduceSinkOutputRowResolver.setIsExprResolver(true); + ArrayList reduceKeys = new ArrayList(); + ArrayList reduceValues = new ArrayList(); + Map colExprMap = new HashMap(); + + // Pre-compute distinct group-by keys and store in reduceKeys + + List outputColumnNames = new ArrayList(); + for (ASTNode distn : distExprs) { + exprNodeDesc distExpr = genExprNodeDesc(distn, inputRR); + reduceKeys.add(distExpr); + String text = distn.toStringTree(); + if (reduceSinkOutputRowResolver.get("", text) == null) { + outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); + String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, + reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), "", false); + reduceSinkOutputRowResolver.put("", text, colInfo); + colExprMap.put(colInfo.getInternalName(), distExpr); + } + } + + // Go over all the grouping keys and aggregations + for (String dest : ks) { + + List grpByExprs = getGroupByForClause(qbp, dest); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + String text = grpbyExpr.toStringTree(); + + if (reduceSinkOutputRowResolver.get("", text) == null) { + exprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, inputRR); + reduceValues.add(grpByExprNode); + String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(reduceValues.size()-1).getTypeInfo(), "", false); + reduceSinkOutputRowResolver.put("", text, colInfo); + outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); + } + } + + // For each aggregation + HashMap aggregationTrees = qbp.getAggregationExprsForClause(dest); + assert (aggregationTrees != null); + + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + + // 0 is the function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode paraExpr = (ASTNode)value.getChild(i); + String text = paraExpr.toStringTree(); + + if (reduceSinkOutputRowResolver.get("", text) == null) { + exprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); + reduceValues.add(paraExprNode); + String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(reduceValues.size()-1).getTypeInfo(), "", false); + reduceSinkOutputRowResolver.put("", text, colInfo); + outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); + } + } + } + } + + ReduceSinkOperator rsOp = (ReduceSinkOperator) logicalPlan.newChildOperator( + PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1), + reduceSinkOutputRowResolver, input); + rsOp.setColumnExprMap(colExprMap); + + return rsOp; + } + + @SuppressWarnings("nls") + private Operator genBodyPlan(QB qb, Operator input) + throws SemanticException { + + QBParseInfo qbp = qb.getParseInfo(); + + TreeSet ks = new TreeSet(); + ks.addAll(qbp.getClauseNames()); + + // For multi-group by with the same distinct, we ignore all user hints currently. It doesnt matter whether he has asked to do + // map-side aggregation or not. Map side aggregation is turned off + boolean optimizeMultiGroupBy = (getCommonDistinctExprs(qb, input) != null); + Operator curr = null; + + // If there are multiple group-bys, map-side aggregation is turned off, there are no filters + // and there is a single distinct, optimize that. Spray initially by the distinct key, + // no computation at the mapper. Have multiple group by operators at the reducer - and then + // proceed + if (optimizeMultiGroupBy) { + curr = createCommonReduceSink(qb, input); + + RowResolver currRR = logicalPlan.getRowResolver(curr); + // create a forward operator + input = logicalPlan.newChildOperator(new forwardDesc(), currRR, curr); + + for (String dest : ks) { + curr = input; + curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr); + curr = genSelectPlan(dest, qb, curr); + Integer limit = qbp.getDestLimit(dest); + if (limit != null) { + curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), true); + qb.getParseInfo().setOuterQueryLimit(limit.intValue()); + } + curr = genFileSinkPlan(dest, qb, curr); + } + } else { + // Go over all the destination tables + for (String dest : ks) { + curr = input; + + if (qbp.getWhrForClause(dest) != null) { + curr = genFilterPlan(dest, qb, curr); + } + + if (qbp.getAggregationExprsForClause(dest).size() != 0 + || getGroupByForClause(qbp, dest).size() > 0) { + // insert a select operator here used by the ColumnPruner to reduce the data to shuffle + curr = insertSelectAllPlanForGroupBy(dest, curr); + if (hiveConf.getVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE).equalsIgnoreCase("true")) { + if (hiveConf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW).equalsIgnoreCase("false")) { + curr = genGroupByPlanMapAggr1MR(dest, qb, curr); + } else { + curr = genGroupByPlanMapAggr2MR(dest, qb, curr); + } + } else if (hiveConf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW).equalsIgnoreCase("true")) { + curr = genGroupByPlan2MR(dest, qb, curr); + } else { + curr = genGroupByPlan1MR(dest, qb, curr); + } + } + + curr = genSelectPlan(dest, qb, curr); + Integer limit = qbp.getDestLimit(dest); + + if (qbp.getClusterByForClause(dest) != null + || qbp.getDistributeByForClause(dest) != null + || qbp.getOrderByForClause(dest) != null + || qbp.getSortByForClause(dest) != null) { + + int numReducers = -1; + + // Use only 1 reducer if order by is present + if (qbp.getOrderByForClause(dest) != null) + numReducers = 1; + + curr = genReduceSinkPlan(dest, qb, curr, numReducers); + } + + if (qbp.getIsSubQ()) { + if (limit != null) { + // In case of order by, only 1 reducer is used, so no need of another shuffle + curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), qbp.getOrderByForClause(dest) != null ? false : true); + } + } else { + // exact limit can be taken care of by the fetch operator + if (limit != null) { + boolean extraMRStep = true; + + if (qb.getIsQuery() && + qbp.getClusterByForClause(dest) == null && + qbp.getSortByForClause(dest) == null) + extraMRStep = false; + + curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), extraMRStep); + qb.getParseInfo().setOuterQueryLimit(limit.intValue()); + } + curr = genFileSinkPlan(dest, qb, curr); + } + + // change curr ops row resolver's tab aliases to query alias if it exists + if(qb.getParseInfo().getAlias() != null) { + RowResolver rr = logicalPlan.getRowResolver(curr); + RowResolver newRR = new RowResolver(); + String alias = qb.getParseInfo().getAlias(); + for(ColumnInfo colInfo: rr.getColumnInfos()) { + String name = colInfo.getInternalName(); + String [] tmp = rr.reverseLookup(name); + newRR.put(alias, tmp[1], colInfo); + } + logicalPlan.setRowResolver(curr, newRR); + } + } + } + + LOG.debug("Created Body Plan for Query Block " + qb.getId()); + return curr; + } + + @SuppressWarnings("nls") + private Operator genUnionPlan(String unionalias, String leftalias, + Operator leftOp, String rightalias, Operator rightOp) + throws SemanticException { + + // Currently, the unions are not merged - each union has only 2 parents. So, a n-way union will lead to (n-1) union operators. + // This can be easily merged into 1 union + RowResolver leftRR = logicalPlan.getRowResolver(leftOp); + RowResolver rightRR = logicalPlan.getRowResolver(rightOp); + HashMap leftmap = leftRR.getFieldMap(leftalias); + HashMap rightmap = rightRR.getFieldMap(rightalias); + // make sure the schemas of both sides are the same + for (Map.Entry lEntry: leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + ColumnInfo rInfo = rightmap.get(field); + if (rInfo == null) { + throw new SemanticException("Schema of both sides of union should match. " + + rightalias + " does not have the field " + field); + } + if (lInfo == null) { + throw new SemanticException("Schema of both sides of union should match. " + + leftalias + " does not have the field " + field); + } + if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { + throw new SemanticException("Schema of both sides of union should match: " + + field + ":" + lInfo.getInternalName() + " " + rInfo.getInternalName()); + } + if (!lInfo.getType().getTypeName().equals(rInfo.getType().getTypeName())) { + throw new SemanticException("Schema of both sides of union should match: Column " + + field + " is of type " + lInfo.getType().getTypeName() + + " on first table and type " + rInfo.getType().getTypeName() + " on second table"); + } + } + + // construct the forward operator + RowResolver unionoutRR = new RowResolver(); + for (Map.Entry lEntry: leftmap.entrySet()) { + String field = lEntry.getKey(); + ColumnInfo lInfo = lEntry.getValue(); + unionoutRR.put(unionalias, field, lInfo); + } + + // FIXME CWS redundant code + // If one of the children is a union, merge with it + // else create a new one + if ((leftOp instanceof UnionOperator) || (rightOp instanceof UnionOperator)) { + if (leftOp instanceof UnionOperator) { + // make left a child of right + List> child = new ArrayList>(); + child.add(leftOp); + rightOp.setChildOperators(child); + + List> parent = leftOp.getParentOperators(); + parent.add(rightOp); + + unionDesc uDesc = ((UnionOperator)leftOp).getConf(); + uDesc.setNumInputs(uDesc.getNumInputs()+1); + return putOpInsertMap(leftOp, unionoutRR); + } + else { + // make right a child of left + List> child = new ArrayList>(); + child.add(rightOp); + leftOp.setChildOperators(child); + + List> parent = rightOp.getParentOperators(); + parent.add(leftOp); + unionDesc uDesc = ((UnionOperator)rightOp).getConf(); + uDesc.setNumInputs(uDesc.getNumInputs()+1); + + return putOpInsertMap(rightOp, unionoutRR); + } + } + + // Create a new union operator + Operator unionforward = logicalPlan.newChildOperator(new unionDesc(), unionoutRR); + + // set union operator as child of each of leftOp and rightOp + List> child = new ArrayList>(); + child.add(unionforward); + rightOp.setChildOperators(child); + + child = new ArrayList>(); + child.add(unionforward); + leftOp.setChildOperators(child); + + List> parent = new ArrayList>(); + parent.add(leftOp); + parent.add(rightOp); + unionforward.setParentOperators(parent); + + // create operator info list to return + //return putOpInsertMap(unionforward, unionoutRR); + return unionforward; + } + + /** + * Generates the sampling predicate from the TABLESAMPLE clause information. This function uses the + * bucket column list to decide the expression inputs to the predicate hash function in case useBucketCols + * is set to true, otherwise the expression list stored in the TableSample is used. The bucket columns of + * the table are used to generate this predicate in case no expressions are provided on the TABLESAMPLE + * clause and the table has clustering columns defined in it's metadata. + * The predicate created has the following structure: + * + * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator + * + * @param ts TABLESAMPLE clause information + * @param bucketCols The clustering columns of the table + * @param useBucketCols Flag to indicate whether the bucketCols should be used as input to the hash + * function + * @param alias The alias used for the table in the row resolver + * @param rwsch The row resolver used to resolve column references + * @param qbm The metadata information for the query block which is used to resolve unaliased columns + * @param planExpr The plan tree for the expression. If the user specified this, the parse expressions are not used + * @return exprNodeDesc + * @exception SemanticException + */ + private exprNodeDesc genSamplePredicate(TableSample ts, List bucketCols, + boolean useBucketCols, String alias, + RowResolver rwsch, QBMetaData qbm, exprNodeDesc planExpr) + throws SemanticException { + + exprNodeDesc numeratorExpr = new exprNodeConstantDesc( + TypeInfoFactory.intTypeInfo, + Integer.valueOf(ts.getNumerator() - 1)); + + exprNodeDesc denominatorExpr = new exprNodeConstantDesc( + TypeInfoFactory.intTypeInfo, + Integer.valueOf(ts.getDenominator())); + + exprNodeDesc intMaxExpr = new exprNodeConstantDesc( + TypeInfoFactory.intTypeInfo, + Integer.valueOf(Integer.MAX_VALUE)); + + ArrayList args = new ArrayList(); + if (planExpr != null) { + args.add(planExpr); + } else if (useBucketCols) { + for (String col : bucketCols) { + ColumnInfo ci = rwsch.get(alias, col); + // TODO: change type to the one in the table schema + args.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName(), + ci.getTabAlias(), ci.getIsPartitionCol())); + } + } else { + for(ASTNode expr: ts.getExprs()) { + args.add(genExprNodeDesc(expr, rwsch)); + } + } + + exprNodeDesc equalsExpr = null; + { + exprNodeDesc hashfnExpr = new exprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, + new GenericUDFHash(), args); + assert(hashfnExpr != null); + LOG.info("hashfnExpr = " + hashfnExpr); + exprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr); + assert(andExpr != null); + LOG.info("andExpr = " + andExpr); + exprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("%", andExpr, denominatorExpr); + assert(modExpr != null); + LOG.info("modExpr = " + modExpr); + LOG.info("numeratorExpr = " + numeratorExpr); + equalsExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", modExpr, numeratorExpr); + LOG.info("equalsExpr = " + equalsExpr); + assert(equalsExpr != null); + } + return equalsExpr; + } + + @SuppressWarnings("nls") + private Operator genTablePlan(String alias, QB qb) throws SemanticException { + + String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias); + Table tab = qb.getMetaData().getSrcForAlias(alias); + RowResolver rwsch; + + // is the table already present + Operator top = logicalPlan.getTopOp(alias_id); + + if (top == null) { + rwsch = new RowResolver(); + try { + StructObjectInspector rowObjectInspector = (StructObjectInspector)tab.getDeserializer().getObjectInspector(); + List fields = rowObjectInspector.getAllStructFieldRefs(); + for (int i=0; i tableOp = top; + TableSample ts = qb.getParseInfo().getTabSample(alias); + if (ts != null) { + int num = ts.getNumerator(); + int den = ts.getDenominator(); + ArrayList sampleExprs = ts.getExprs(); + + // TODO: Do the type checking of the expressions + List tabBucketCols = tab.getBucketCols(); + int numBuckets = tab.getNumBuckets(); + + // If there are no sample cols and no bucket cols then throw an error + if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) { + throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getName()); + } + + // check if a predicate is needed + // predicate is needed if either input pruning is not enough + // or if input pruning is not possible + + // check if the sample columns are the same as the table bucket columns + boolean colsEqual = true; + if ( (sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0) ) { + colsEqual = false; + } + + for (int i = 0; i < sampleExprs.size() && colsEqual; i++) { + boolean colFound = false; + for (int j = 0; j < tabBucketCols.size() && !colFound; j++) { + if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) { + break; + } + + if (((ASTNode)sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) { + colFound = true; + } + } + colsEqual = (colsEqual && colFound); + } + + // Check if input can be pruned + ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)); + + // check if input pruning is enough + if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual) + && (num == den || den <= numBuckets && numBuckets % den == 0)) { + // input pruning is enough; no need for filter + LOG.info("No need for sample filter"); + // TODO sample predicate is not needed, but we are adding it anyway since + // input pruning is broken for subqueries. will remove this once we move + // compilation of sampling to use the operator tree + exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); + + tableOp = logicalPlan.newChildOperator(new filterDesc(samplePredicate, true), rwsch, top); + + } else { + // need to add filter + // create tableOp to be filterDesc and set as child to 'top' + LOG.info("Need sample filter"); + exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); + + tableOp = logicalPlan.newChildOperator(new filterDesc(samplePredicate, true), rwsch, top); + } + } + else { + boolean testMode = hiveConf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); + if (testMode) { + String tabName = tab.getName(); + + // has the user explicitly asked not to sample this table + String unSampleTblList = hiveConf.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE); + String[] unSampleTbls = unSampleTblList.split(","); + boolean unsample = false; + for (String unSampleTbl : unSampleTbls) + if (tabName.equalsIgnoreCase(unSampleTbl)) + unsample = true; + + if (!unsample) { + int numBuckets = tab.getNumBuckets(); + + // If the input table is bucketed, choose the first bucket + if (numBuckets > 0) { + TableSample tsSample = new TableSample(1, numBuckets); + tsSample.setInputPruning(true); + qb.getParseInfo().setTabSample(alias, tsSample); + LOG.info("No need for sample filter"); + } + // The table is not bucketed, add a dummy filter :: rand() + else { + int freq = hiveConf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ); + TableSample tsSample = new TableSample(1, freq); + tsSample.setInputPruning(false); + qb.getParseInfo().setTabSample(alias, tsSample); + LOG.info("Need sample filter"); + exprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand", new exprNodeConstantDesc(Integer.valueOf(460476415))); + exprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc); + + tableOp = logicalPlan.newChildOperator(new filterDesc(samplePred, true), rwsch, top); + } + } + } + } + + LOG.debug("Created Table Plan for " + alias + " " + tableOp.toString()); + + return tableOp; + } + + private Operator genPlan(QBExpr qbexpr) throws SemanticException { + if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { + return genPlan(qbexpr.getQB()); + } + if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { + Operator qbexpr1Ops = genPlan(qbexpr.getQBExpr1()); + Operator qbexpr2Ops = genPlan(qbexpr.getQBExpr2()); + + return genUnionPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), + qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); + } + return null; + } + + @SuppressWarnings("nls") + private Operator genPlan(QB qb) throws SemanticException { + + // First generate all the opInfos for the elements in the from clause + HashMap> aliasToOpInfo = new HashMap>(); + + // Recurse over the subqueries to fill the subquery part of the plan + for (String alias : qb.getSubqAliases()) { + QBExpr qbexpr = qb.getSubqForAlias(alias); + aliasToOpInfo.put(alias, genPlan(qbexpr)); + qbexpr.setAlias(alias); + } + + // Recurse over all the source tables + for (String alias : qb.getTabAliases()) { + aliasToOpInfo.put(alias, genTablePlan(alias, qb)); + } + + Operator srcOpInfo = null; + + // process join + if (qb.getParseInfo().getJoinExpr() != null) { + ASTNode joinExpr = qb.getParseInfo().getJoinExpr(); + + if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { + QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr); + qb.setQbJoinTree(joinTree); + } else { + QBJoinTree joinTree = genJoinTree(qb, joinExpr); + qb.setQbJoinTree(joinTree); + mergeJoinTree(qb); + } + + // if any filters are present in the join tree, push them on top of the table + pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo); + srcOpInfo = genJoinPlan(qb, aliasToOpInfo); + } + else + // Now if there are more than 1 sources then we have a join case + // later we can extend this to the union all case as well + srcOpInfo = aliasToOpInfo.values().iterator().next(); + + Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo); + LOG.debug("Created Plan for Query Block " + qb.getId()); + + return bodyOpInfo; + } + + public LogicalPlan genLogicalPlan(QB qb) throws SemanticException { + logicalPlan = new LogicalPlan(hiveConf); + genPlan(qb); + return logicalPlan; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PhysicalPlanGenerator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PhysicalPlanGenerator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PhysicalPlanGenerator.java (revision 0) @@ -0,0 +1,552 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.ExecDriver; +import org.apache.hadoop.hive.ql.exec.MapRedTask; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; +import org.apache.hadoop.hive.ql.optimizer.GenMROperator; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; +import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; +import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; +import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; +import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4; +import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1; +import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1; +import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.plan.DDLWork; +import org.apache.hadoop.hive.ql.plan.createTableDesc; +import org.apache.hadoop.hive.ql.plan.fetchWork; +import org.apache.hadoop.hive.ql.plan.loadFileDesc; +import org.apache.hadoop.hive.ql.plan.loadTableDesc; +import org.apache.hadoop.hive.ql.plan.mapredWork; +import org.apache.hadoop.hive.ql.plan.moveWork; +import org.apache.hadoop.hive.ql.plan.partitionDesc; +import org.apache.hadoop.hive.ql.plan.tableDesc; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.mapred.TextInputFormat; + + +public class PhysicalPlanGenerator { + QB qb; + LogicalPlan logicalPlan; + PhysicalPlan physicalPlan; + + private final Log LOG; + + Context context; + + public PhysicalPlanGenerator(Context context, LogicalPlan logicalPlan, QB qb) { + this.context = context; + this.logicalPlan = logicalPlan; + this.qb = qb; + LOG = LogFactory.getLog(this.getClass().getName()); + } + + + private HiveConf getHiveConf() { + return logicalPlan.getHiveConf(); + } + + private Context getContext() { + return context; + } + + public PhysicalPlan genPlan(PhysicalPlan physicalPlan) throws SemanticException { + this.physicalPlan = physicalPlan; + this.physicalPlan.addInputs(logicalPlan.getInputs()); + this.physicalPlan.addOutputs(logicalPlan.getOutputs()); + + genMapRedTasks(); + return physicalPlan; + } + + public PhysicalPlan genPlan() throws SemanticException { + return genPlan(new PhysicalPlan()); + } + + @SuppressWarnings({ "nls", "deprecation" }) + private void genMapRedTasks() throws SemanticException { + fetchWork fetch = null; + List> mvTask = new ArrayList>(); + Task fetchTask = null; + + QBParseInfo qbParseInfo = qb.getParseInfo(); + + // Does this query need reduce job + if (qb.isSelectStarQuery() + && qbParseInfo.getDestToClusterBy().isEmpty() + && qbParseInfo.getDestToDistributeBy().isEmpty() + && qbParseInfo.getDestToOrderBy().isEmpty() + && qbParseInfo.getDestToSortBy().isEmpty()) { + boolean noMapRed = false; + + Iterator> iter = qb.getMetaData().getAliasToTable().entrySet().iterator(); + Table tab = ((Map.Entry)iter.next()).getValue(); + if (!tab.isPartitioned()) { + if (qbParseInfo.getDestToWhereExpr().isEmpty()) { + fetch = new fetchWork(tab.getPath().toString(), Utilities.getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit()); + noMapRed = true; + physicalPlan.addInput(new ReadEntity(tab)); + } + } + else { + + Collection> topOps = logicalPlan.getTopOps(); + if (topOps.size() == 1) { + TableScanOperator ts = (TableScanOperator)topOps.toArray()[0]; + + // check if the pruner only contains partition columns + if (PartitionPruner.onlyContainsPartnCols(logicalPlan.getTable(ts), logicalPlan.getPartPruner(ts))) { + + PrunedPartitionList partsList = null; + try { + partsList = PartitionPruner.prune(logicalPlan.getTable(ts), + logicalPlan.getPartPruner(ts), + getHiveConf(), + (String)logicalPlan.getTopOpAliases().toArray()[0]); + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); + } + + // If there is any unknown partition, create a map-reduce job for the filter to prune correctly + if (partsList.getUnknownPartns().size() == 0) { + List listP = new ArrayList(); + List partP = new ArrayList(); + + Set parts = partsList.getConfirmedPartns(); + Iterator iterParts = parts.iterator(); + while (iterParts.hasNext()) { + Partition part = iterParts.next(); + listP.add(part.getPartitionPath().toString()); + try{ + partP.add(Utilities.getPartitionDesc(part)); + } catch (HiveException e) { + throw new SemanticException(e.getMessage(), e); + } + physicalPlan.addInput(new ReadEntity(part)); + } + + fetch = new fetchWork(listP, partP, qb.getParseInfo().getOuterQueryLimit()); + noMapRed = true; + } + } + } + } + + if (noMapRed) { + fetchTask = TaskFactory.get(fetch, getHiveConf()); + physicalPlan.setFetchTask(fetchTask); + + // remove root tasks if any + physicalPlan.clearRootTasks(); + return; + } + } + + List loadFileWork = logicalPlan.getLoadFileWork(); + List loadTableWork = logicalPlan.getLoadTableWork(); + + // In case of a select, use a fetch task instead of a move task + if (qb.getIsQuery()) { + + if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) { + throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); + } + + loadFileDesc loadFile = loadFileWork.get(0); + String cols = loadFile.getColumns(); + String colTypes = loadFile.getColumnTypes(); + + fetch = new fetchWork(new Path(loadFile.getSourceDir()).toString(), + new tableDesc(LazySimpleSerDe.class, TextInputFormat.class, + IgnoreKeyTextOutputFormat.class, + Utilities.makeProperties( + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode, + org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, cols, + org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, colTypes)), + qb.getParseInfo().getOuterQueryLimit()); + + fetchTask = TaskFactory.get(fetch, getHiveConf()); + physicalPlan.setFetchTask(fetchTask); + } else { + // First we generate the move work as this needs to be made dependent on all + // the tasks that have a file sink operation + for (loadTableDesc ltd : loadTableWork) { + mvTask.add(TaskFactory.get(new moveWork(physicalPlan, ltd, null, false), getHiveConf())); + } + + boolean oneLoadFile = true; + for (loadFileDesc lfd : loadFileWork) { + if ( qb.isCTAS() ) { + assert(oneLoadFile); // should not have more than 1 load file for CTAS + // make the movetask's destination directory the table's destination. + String location = qb.getTableDesc().getLocation(); + if ( location == null ) { + // get the table's default location + location = getHiveConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); + assert(location.length() > 0 ); + if ( location.charAt(location.length()-1) != '/' ) { + location += '/'; + } + location += qb.getTableDesc().getTableName().toLowerCase(); + } + lfd.setTargetDir(location); + oneLoadFile = false; + } + mvTask.add(TaskFactory.get(new moveWork(physicalPlan, null, lfd, false), getHiveConf())); + } + } + + // generate map reduce plans + GenMRProcContext procCtx = new GenMRProcContext(getContext(), logicalPlan, physicalPlan, mvTask); + + // create a walker which walks the tree in a DFS manner while maintaining the operator stack. + // The dispatcher generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp(new String("R1"), "TS%"), new GenMRTableScan1()); + opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"), new GenMRRedSink1()); + opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"), new GenMRRedSink2()); + opRules.put(new RuleRegExp(new String("R4"), "FS%"), new GenMRFileSink1()); + opRules.put(new RuleRegExp(new String("R5"), "UNION%"), new GenMRUnion1()); + opRules.put(new RuleRegExp(new String("R6"), "UNION%.*RS%"), new GenMRRedSink3()); + opRules.put(new RuleRegExp(new String("R6"), "MAPJOIN%.*RS%"), new GenMRRedSink4()); + opRules.put(new RuleRegExp(new String("R7"), "TS%.*MAPJOIN%"), MapJoinFactory.getTableScanMapJoin()); + opRules.put(new RuleRegExp(new String("R8"), "RS%.*MAPJOIN%"), MapJoinFactory.getReduceSinkMapJoin()); + opRules.put(new RuleRegExp(new String("R9"), "UNION%.*MAPJOIN%"), MapJoinFactory.getUnionMapJoin()); + opRules.put(new RuleRegExp(new String("R10"), "MAPJOIN%.*MAPJOIN%"), MapJoinFactory.getMapJoinMapJoin()); + opRules.put(new RuleRegExp(new String("R11"), "MAPJOIN%SEL%"), MapJoinFactory.getMapJoin()); + + // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx); + + GraphWalker ogw = new GenMapRedWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(logicalPlan.getTopOps()); + ogw.startWalking(topNodes, null); + + List> rootTasks = physicalPlan.getRootTasks(); + + // reduce sink does not have any kids - since the plan by now has been broken up into multiple + // tasks, iterate over all tasks. + // For each task, go over all operators recursively + for (Task rootTask: rootTasks) { + breakTaskTree(rootTask); + } + + // For each task, set the key descriptor for the reducer + for (Task rootTask: rootTasks) { + setKeyDescTaskTree(rootTask); + } + + // For each operator, generate the counters if needed + if (HiveConf.getBoolVar(getHiveConf(), HiveConf.ConfVars.HIVEJOBPROGRESS)) { + for (Task rootTask: rootTasks) { + generateCountersTask(rootTask); + } + } + + if ( qb.isCTAS() ) { + // generate a DDL task and make it a dependent task of the leaf + createTableDesc crtTblDesc = qb.getTableDesc(); + + validateCreateTable(crtTblDesc); + + // Clear the output for CTAS since we don't need the output from the mapredWork, the + // DDLWork at the tail of the chain will have the output + physicalPlan.clearOutputs(); + + Task crtTblTask = + TaskFactory.get(new DDLWork(physicalPlan, crtTblDesc), getHiveConf()); + + // find all leaf tasks and make the DDLTask as a dependent task of all of them + HashSet> leaves = new HashSet>(); + getLeafTasks(rootTasks, leaves); + assert(leaves.size() > 0); + for ( Task task: leaves ) { + task.addDependentTask(crtTblTask); + } + } + } + + /** + * Find all leaf tasks of the list of root tasks. + */ + private void getLeafTasks(Collection> rootTasks, + HashSet> leaves) { + + for ( Task root : rootTasks ) { + getLeafTasks(root, leaves); + } + } + + private void getLeafTasks(Task task, + HashSet> leaves) { + if ( task.getChildTasks() == null ) { + if ( ! leaves.contains(task) ) { + leaves.add(task); + } + } else { + getLeafTasks(task.getChildTasks(), leaves); + } + } + + + + // loop over all the tasks recursviely + private void generateCountersTask(Task task) { + if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { + HashMap> opMap = ((mapredWork)task.getWork()).getAliasToWork(); + if (!opMap.isEmpty()) { + for (Operator op: opMap.values()) { + generateCountersOperator(op); + } + } + + Operator reducer = ((mapredWork)task.getWork()).getReducer(); + if (reducer != null) { + LOG.info("Generating counters for operator " + reducer); + generateCountersOperator(reducer); + } + } + else if (task instanceof ConditionalTask) { + List> listTasks = ((ConditionalTask)task).getListTasks(); + for (Task tsk : listTasks) + generateCountersTask(tsk); + } + + // Start the counters from scratch - a hack for hadoop 17. + Operator.resetLastEnumUsed(); + + if (task.getChildTasks() == null) + return; + + for (Task childTask : task.getChildTasks()) + generateCountersTask(childTask); + } + + + private void generateCountersOperator(Operator op) { + op.assignCounterNameToEnum(); + + if (op.getChildOperators() == null) + return; + + for (Operator child: op.getChildOperators()) + generateCountersOperator(child); + } + + // loop over all the tasks recursviely + private void breakTaskTree(Task task) { + + if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { + HashMap> opMap = ((mapredWork)task.getWork()).getAliasToWork(); + if (!opMap.isEmpty()) + for (Operator op: opMap.values()) { + breakOperatorTree(op); + } + } + else if (task instanceof ConditionalTask) { + List> listTasks = ((ConditionalTask)task).getListTasks(); + for (Task tsk : listTasks) + breakTaskTree(tsk); + } + + if (task.getChildTasks() == null) + return; + + for (Task childTask : task.getChildTasks()) + breakTaskTree(childTask); + } + + // loop over all the operators recursviely + private void breakOperatorTree(Operator topOp) { + if (topOp instanceof ReduceSinkOperator) + topOp.setChildOperators(null); + + if (topOp.getChildOperators() == null) + return; + + for (Operator op: topOp.getChildOperators()) + breakOperatorTree(op); + } + + // loop over all the tasks recursviely + private void setKeyDescTaskTree(Task task) { + + if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { + mapredWork work = (mapredWork)task.getWork(); + HashMap> opMap = work.getAliasToWork(); + if (!opMap.isEmpty()) + for (Operator op: opMap.values()) + GenMapRedUtils.setKeyAndValueDesc(work, op); + } + else if (task instanceof ConditionalTask) { + List> listTasks = ((ConditionalTask)task).getListTasks(); + for (Task tsk : listTasks) + setKeyDescTaskTree(tsk); + } + + if (task.getChildTasks() == null) + return; + + for (Task childTask : task.getChildTasks()) + setKeyDescTaskTree(childTask); + } + + // TODO CWS duplicated from DMLSemanticAnalyzer + @SuppressWarnings("unchecked") + private void validateCreateTable(createTableDesc crtTblDesc) throws SemanticException { + // no duplicate column names + // currently, it is a simple n*n algorithm - this can be optimized later if need be + // but it should not be a major bottleneck as the number of columns are anyway not so big + + if((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) { + // for now make sure that serde exists + if(StringUtils.isEmpty(crtTblDesc.getSerName()) || SerDeUtils.isNativeSerDe(crtTblDesc.getSerName())) { + throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg()); + } + return; + } + + try { + Class origin = Class.forName(crtTblDesc.getOutputFormat(), true, JavaUtils.getClassLoader()); + Class replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin); + if(replaced == null) + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); + } catch (ClassNotFoundException e) { + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); + } + + Iterator iterCols = crtTblDesc.getCols().iterator(); + List colNames = new ArrayList(); + while (iterCols.hasNext()) { + String colName = iterCols.next().getName(); + Iterator iter = colNames.iterator(); + while (iter.hasNext()) { + String oldColName = iter.next(); + if (colName.equalsIgnoreCase(oldColName)) + throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES.getMsg()); + } + colNames.add(colName); + } + + if (crtTblDesc.getBucketCols() != null) + { + // all columns in cluster and sort are valid columns + Iterator bucketCols = crtTblDesc.getBucketCols().iterator(); + while (bucketCols.hasNext()) { + String bucketCol = bucketCols.next(); + boolean found = false; + Iterator colNamesIter = colNames.iterator(); + while (colNamesIter.hasNext()) { + String colName = colNamesIter.next(); + if (bucketCol.equalsIgnoreCase(colName)) { + found = true; + break; + } + } + if (!found) + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); + } + } + + if (crtTblDesc.getSortCols() != null) + { + // all columns in cluster and sort are valid columns + Iterator sortCols = crtTblDesc.getSortCols().iterator(); + while (sortCols.hasNext()) { + String sortCol = sortCols.next().getCol(); + boolean found = false; + Iterator colNamesIter = colNames.iterator(); + while (colNamesIter.hasNext()) { + String colName = colNamesIter.next(); + if (sortCol.equalsIgnoreCase(colName)) { + found = true; + break; + } + } + if (!found) + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); + } + } + + if (crtTblDesc.getPartCols() != null) + { + // there is no overlap between columns and partitioning columns + Iterator partColsIter = crtTblDesc.getPartCols().iterator(); + while (partColsIter.hasNext()) { + String partCol = partColsIter.next().getName(); + Iterator colNamesIter = colNames.iterator(); + while (colNamesIter.hasNext()) { + String colName = ParseUtils.unescapeIdentifier(colNamesIter.next()); + if (partCol.equalsIgnoreCase(colName)) + throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg()); + } + } + } + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (working copy) @@ -83,7 +83,12 @@ case HiveParser.TOK_CREATEFUNCTION: case HiveParser.TOK_DROPFUNCTION: return new FunctionSemanticAnalyzer(conf); - default: return new SemanticAnalyzer(conf); + case HiveParser.TOK_QUERY: + case HiveParser.TOK_CREATETABLE: + return new DMLSemanticAnalyzer(conf); + default: + throw new SemanticException("Unrecognized parser token type " + + tree.getToken().toString()); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -19,10 +19,6 @@ package org.apache.hadoop.hive.ql.parse; import java.util.*; -import java.io.File; -import java.io.IOException; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,12 +26,9 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.metadata.*; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -49,26 +42,16 @@ import org.apache.hadoop.mapred.TextInputFormat; public abstract class BaseSemanticAnalyzer { - protected final Hive db; - protected final HiveConf conf; - protected List> rootTasks; - protected Task fetchTask; - protected boolean fetchTaskInit; + private final Hive db; + private final HiveConf hiveConf; + protected final Log LOG; protected final LogHelper console; - protected Context ctx; - protected HashMap idToTableNameMap; + private Context context; - /** - * ReadEntitites that are passed to the hooks. - */ - protected Set inputs; - /** - * List of WriteEntities that are passed to the hooks. - */ - protected Set outputs; - + protected PhysicalPlan physicalPlan; + protected static final String TEXTFILE_INPUT = TextInputFormat.class.getName(); protected static final String TEXTFILE_OUTPUT = IgnoreKeyTextOutputFormat.class.getName(); protected static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); @@ -79,199 +62,60 @@ public BaseSemanticAnalyzer(HiveConf conf) throws SemanticException { try { - this.conf = conf; + this.hiveConf = conf; db = Hive.get(conf); - rootTasks = new ArrayList>(); LOG = LogFactory.getLog(this.getClass().getName()); console = new LogHelper(LOG); - this.idToTableNameMap = new HashMap(); - inputs = new LinkedHashSet(); - outputs = new LinkedHashSet(); + physicalPlan = new PhysicalPlan(); } catch (Exception e) { throw new SemanticException (e); } } - public HashMap getIdToTableNameMap() { - return idToTableNameMap; - } - - public abstract void analyzeInternal(ASTNode ast) throws SemanticException; public void analyze(ASTNode ast, Context ctx) throws SemanticException { - this.ctx = ctx; + this.context = ctx; analyzeInternal(ast); } public void validate() throws SemanticException { // Implementations may choose to override this } - - public List> getRootTasks() { - return rootTasks; + + public Hive getDB() { + return db; } - - /** - * @return the fetchTask - */ - public Task getFetchTask() { - return fetchTask; + + public HiveConf getHiveConf() { + return hiveConf; } - - /** - * @param fetchTask the fetchTask to set - */ - public void setFetchTask(Task fetchTask) { - this.fetchTask = fetchTask; + + public Context getContext() { + return context; } - - public boolean getFetchTaskInit() { - return fetchTaskInit; + + public void setContext(Context context) { + this.context = context; } - public void setFetchTaskInit(boolean fetchTaskInit) { - this.fetchTaskInit = fetchTaskInit; + public PhysicalPlan getPhysicalPlan() { + return physicalPlan; } + + + // TODO CWS called from Driver.execute() + public Map getIdToTableNameMap() { + return new HashMap(); + } + protected void reset() { - rootTasks = new ArrayList>(); + getPhysicalPlan().clearRootTasks(); } - public static String stripQuotes(String val) throws SemanticException { - if ((val.charAt(0) == '\'' && val.charAt(val.length() - 1) == '\'') - || (val.charAt(0) == '\"' && val.charAt(val.length() - 1) == '\"')) { - val = val.substring(1, val.length() - 1); - } - return val; - } - - public static String charSetString(String charSetName, String charSetString) - throws SemanticException { - try - { - // The character set name starts with a _, so strip that - charSetName = charSetName.substring(1); - if (charSetString.charAt(0) == '\'') - return new String(unescapeSQLString(charSetString).getBytes(), charSetName); - else // hex input is also supported - { - assert charSetString.charAt(0) == '0'; - assert charSetString.charAt(1) == 'x'; - charSetString = charSetString.substring(2); - - byte[] bArray = new byte[charSetString.length()/2]; - int j = 0; - for (int i = 0; i < charSetString.length(); i += 2) - { - int val = Character.digit(charSetString.charAt(i), 16) * 16 + Character.digit(charSetString.charAt(i+1), 16); - if (val > 127) - val = val - 256; - bArray[j++] = new Integer(val).byteValue(); - } - - String res = new String(bArray, charSetName); - return res; - } - } catch (UnsupportedEncodingException e) { - throw new SemanticException(e); - } - } - /** - * Remove the encapsulating "`" pair from the identifier. - * We allow users to use "`" to escape identifier for table names, - * column names and aliases, in case that coincide with Hive language - * keywords. - */ - public static String unescapeIdentifier(String val) { - if (val == null) { - return null; - } - if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') { - val = val.substring(1, val.length() - 1); - } - return val; - } - - @SuppressWarnings("nls") - public static String unescapeSQLString(String b) { - - Character enclosure = null; - - // Some of the strings can be passed in as unicode. For example, the - // delimiter can be passed in as \002 - So, we first check if the - // string is a unicode number, else go back to the old behavior - StringBuilder sb = new StringBuilder(b.length()); - for (int i=0; i < b.length(); i++) { - - char currentChar = b.charAt(i); - if (enclosure == null) { - if (currentChar == '\'' || b.charAt(i) == '\"') { - enclosure = currentChar; - } - // ignore all other chars outside the enclosure - continue; - } - - if (enclosure.equals(currentChar)) { - enclosure = null; - continue; - } - - if (currentChar == '\\' && (i+4 < b.length())) { - char i1 = b.charAt(i+1); - char i2 = b.charAt(i+2); - char i3 = b.charAt(i+3); - if ((i1 >= '0' && i1 <= '1') && - (i2 >= '0' && i2 <= '7') && - (i3 >= '0' && i3 <= '7')) - { - byte bVal = (byte)((i3 - '0') + ((i2 - '0') * 8 ) + ((i1 - '0') * 8 * 8)); - byte[] bValArr = new byte[1]; - bValArr[0] = bVal; - String tmp = new String(bValArr); - sb.append(tmp); - i += 3; - continue; - } - } - - if (currentChar == '\\' && (i+2 < b.length())) { - char n=b.charAt(i+1); - switch(n) { - case '0': sb.append("\0"); break; - case '\'': sb.append("'"); break; - case '"': sb.append("\""); break; - case 'b': sb.append("\b"); break; - case 'n': sb.append("\n"); break; - case 'r': sb.append("\r"); break; - case 't': sb.append("\t"); break; - case 'Z': sb.append("\u001A"); break; - case '\\': sb.append("\\"); break; - // The following 2 lines are exactly what MySQL does - case '%': sb.append("\\%"); break; - case '_': sb.append("\\_"); break; - default: sb.append(n); - } - i++; - } else { - sb.append(currentChar); - } - } - return sb.toString(); - } - - public Set getInputs() { - return inputs; - } - - public Set getOutputs() { - return outputs; - } - - /** * Get the list of FieldSchema out of the ASTNode. */ protected List getColumns(ASTNode ast) throws SemanticException @@ -283,14 +127,14 @@ ASTNode child = (ASTNode)ast.getChild(i); // child 0 is the name of the column - col.setName(unescapeIdentifier(child.getChild(0).getText())); + col.setName(ParseUtils.unescapeIdentifier(child.getChild(0).getText())); // child 1 is the type of the column ASTNode typeChild = (ASTNode)(child.getChild(1)); col.setType(getTypeStringFromAST(typeChild)); // child 2 is the optional comment of the column if (child.getChildCount() == 3) - col.setComment(unescapeSQLString(child.getChild(2).getText())); + col.setComment(ParseUtils.unescapeSQLString(child.getChild(2).getText())); colList.add(col); } return colList; @@ -302,7 +146,7 @@ int numCh = ast.getChildCount(); for (int i = 0; i < numCh; i++) { ASTNode child = (ASTNode)ast.getChild(i); - colList.add(unescapeIdentifier(child.getText())); + colList.add(ParseUtils.unescapeIdentifier(child.getText())); } return colList; } @@ -314,9 +158,9 @@ for (int i = 0; i < numCh; i++) { ASTNode child = (ASTNode)ast.getChild(i); if (child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEASC) - colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), 1)); + colList.add(new Order(ParseUtils.unescapeIdentifier(child.getChild(0).getText()), 1)); else - colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), 0)); + colList.add(new Order(ParseUtils.unescapeIdentifier(child.getChild(0).getText()), 0)); } return colList; } @@ -346,7 +190,7 @@ throw new SemanticException("empty struct not allowed."); for (int i = 0; i < children; i++) { ASTNode child = (ASTNode) typeNode.getChild(i); - typeStr += unescapeIdentifier(child.getChild(0).getText()) + ":"; + typeStr += ParseUtils.unescapeIdentifier(child.getChild(0).getText()) + ":"; typeStr += getTypeStringFromAST((ASTNode) child.getChild(1)); if (i < children - 1) typeStr += ","; @@ -370,7 +214,7 @@ try { // get table metadata - tableName = unescapeIdentifier(ast.getChild(0).getText()); + tableName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); if (testMode) tableName = conf.getVar(HiveConf.ConfVars.HIVETESTMODEPREFIX) + tableName; @@ -388,8 +232,8 @@ partSpec = new LinkedHashMap(); for (int i = 0; i < partspec.getChildCount(); ++i) { ASTNode partspec_val = (ASTNode) partspec.getChild(i); - String val = stripQuotes(partspec_val.getChild(1).getText()); - partSpec.put(unescapeIdentifier(partspec_val.getChild(0).getText().toLowerCase()), val); + String val = ParseUtils.stripQuotes(partspec_val.getChild(1).getText()); + partSpec.put(ParseUtils.unescapeIdentifier(partspec_val.getChild(0).getText().toLowerCase()), val); } try { // this doesn't create partition. partition is created in MoveTask @@ -402,10 +246,11 @@ public String toString() { - if(partHandle != null) + if(partHandle != null) { return partHandle.toString(); - else + } else { return tableHandle.toString(); + } } } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -1,5329 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Formatter; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; -import java.util.Vector; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; -import java.lang.ClassNotFoundException; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.ExecDriver; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.MapRedTask; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.RecordReader; -import org.apache.hadoop.hive.ql.exec.RecordWriter; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.InvalidTableException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; -import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; -import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; -import org.apache.hadoop.hive.ql.optimizer.GenMROperator; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; -import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; -import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1; -import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1; -import org.apache.hadoop.hive.ql.optimizer.Optimizer; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; -import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.aggregationDesc; -import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.exprNodeDesc; -import org.apache.hadoop.hive.ql.plan.exprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.exprNodeNullDesc; -import org.apache.hadoop.hive.ql.plan.extractDesc; -import org.apache.hadoop.hive.ql.plan.fetchWork; -import org.apache.hadoop.hive.ql.plan.fileSinkDesc; -import org.apache.hadoop.hive.ql.plan.filterDesc; -import org.apache.hadoop.hive.ql.plan.forwardDesc; -import org.apache.hadoop.hive.ql.plan.groupByDesc; -import org.apache.hadoop.hive.ql.plan.joinDesc; -import org.apache.hadoop.hive.ql.plan.limitDesc; -import org.apache.hadoop.hive.ql.plan.loadFileDesc; -import org.apache.hadoop.hive.ql.plan.loadTableDesc; -import org.apache.hadoop.hive.ql.plan.mapredWork; -import org.apache.hadoop.hive.ql.plan.moveWork; -import org.apache.hadoop.hive.ql.plan.partitionDesc; -import org.apache.hadoop.hive.ql.plan.reduceSinkDesc; -import org.apache.hadoop.hive.ql.plan.scriptDesc; -import org.apache.hadoop.hive.ql.plan.selectDesc; -import org.apache.hadoop.hive.ql.plan.tableDesc; -import org.apache.hadoop.hive.ql.plan.tableScanDesc; -import org.apache.hadoop.hive.ql.plan.unionDesc; -import org.apache.hadoop.hive.ql.ppd.PredicatePushDown; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.ql.exec.TextRecordReader; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; - -import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; - -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.mapred.SequenceFileInputFormat; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.ql.plan.DDLWork; -import org.apache.hadoop.hive.ql.plan.createTableDesc; -import org.apache.hadoop.hive.ql.plan.createTableLikeDesc; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.FetchOperator; -import java.util.Collection; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; - -/** - * Implementation of the semantic analyzer - */ - -public class SemanticAnalyzer extends BaseSemanticAnalyzer { - private HashMap opToPartPruner; - private HashMap aliasToSamplePruner; - private HashMap> topOps; - private HashMap> topSelOps; - private LinkedHashMap, OpParseContext> opParseCtx; - private List loadTableWork; - private List loadFileWork; - private Map joinContext; - private HashMap topToTable; - private QB qb; - private ASTNode ast; - private int destTableId; - private UnionProcContext uCtx; - List listMapJoinOpsNoReducer; - - private static class Phase1Ctx { - String dest; - int nextNum; - } - - public SemanticAnalyzer(HiveConf conf) throws SemanticException { - - super(conf); - - this.opToPartPruner = new HashMap(); - this.aliasToSamplePruner = new HashMap(); - this.topOps = new HashMap>(); - this.topSelOps = new HashMap>(); - this.loadTableWork = new ArrayList(); - this.loadFileWork = new ArrayList(); - opParseCtx = new LinkedHashMap, OpParseContext>(); - joinContext = new HashMap(); - topToTable = new HashMap(); - this.destTableId = 1; - this.uCtx = null; - this.listMapJoinOpsNoReducer = new ArrayList(); - } - - @Override - protected void reset() { - super.reset(); - this.loadTableWork.clear(); - this.loadFileWork.clear(); - this.topOps.clear(); - this.topSelOps.clear(); - this.destTableId = 1; - this.idToTableNameMap.clear(); - qb = null; - ast = null; - uCtx = null; - this.aliasToSamplePruner.clear(); - this.joinContext.clear(); - this.opParseCtx.clear(); - } - - public void init(ParseContext pctx) { - opToPartPruner = pctx.getOpToPartPruner(); - aliasToSamplePruner = pctx.getAliasToSamplePruner(); - topOps = pctx.getTopOps(); - topSelOps = pctx.getTopSelOps(); - opParseCtx = pctx.getOpParseCtx(); - loadTableWork = pctx.getLoadTableWork(); - loadFileWork = pctx.getLoadFileWork(); - joinContext = pctx.getJoinContext(); - ctx = pctx.getContext(); - destTableId = pctx.getDestTableId(); - idToTableNameMap = pctx.getIdToTableNameMap(); - this.uCtx = pctx.getUCtx(); - this.listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); - qb = pctx.getQB(); - } - - public ParseContext getParseContext() { - return new ParseContext(conf, qb, ast, opToPartPruner, aliasToSamplePruner, topOps, - topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, - loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, - listMapJoinOpsNoReducer); - } - - @SuppressWarnings("nls") - public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, - String alias) throws SemanticException { - - assert (ast.getToken() != null); - switch (ast.getToken().getType()) { - case HiveParser.TOK_QUERY: { - QB qb = new QB(id, alias, true); - doPhase1(ast, qb, initPhase1Ctx()); - qbexpr.setOpcode(QBExpr.Opcode.NULLOP); - qbexpr.setQB(qb); - } - break; - case HiveParser.TOK_UNION: { - qbexpr.setOpcode(QBExpr.Opcode.UNION); - // query 1 - assert (ast.getChild(0) != null); - QBExpr qbexpr1 = new QBExpr(alias + "-subquery1"); - doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + "-subquery1", - alias + "-subquery1"); - qbexpr.setQBExpr1(qbexpr1); - - // query 2 - assert (ast.getChild(0) != null); - QBExpr qbexpr2 = new QBExpr(alias + "-subquery2"); - doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + "-subquery2", - alias + "-subquery2"); - qbexpr.setQBExpr2(qbexpr2); - } - break; - } - } - - private LinkedHashMap doPhase1GetAggregationsFromSelect( - ASTNode selExpr) { - // Iterate over the selects search for aggregation Trees. - // Use String as keys to eliminate duplicate trees. - LinkedHashMap aggregationTrees = new LinkedHashMap(); - for (int i = 0; i < selExpr.getChildCount(); ++i) { - ASTNode sel = (ASTNode) selExpr.getChild(i).getChild(0); - doPhase1GetAllAggregations(sel, aggregationTrees); - } - return aggregationTrees; - } - - /** - * DFS-scan the expressionTree to find all aggregation subtrees and put them - * in aggregations. - * - * @param expressionTree - * @param aggregations - * the key to the HashTable is the toStringTree() representation of - * the aggregation subtree. - */ - private void doPhase1GetAllAggregations(ASTNode expressionTree, - HashMap aggregations) { - if (expressionTree.getToken().getType() == HiveParser.TOK_FUNCTION - || expressionTree.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { - assert (expressionTree.getChildCount() != 0); - if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { - String functionName = unescapeIdentifier(expressionTree.getChild(0).getText()); - if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { - aggregations.put(expressionTree.toStringTree(), expressionTree); - return; - } - } - } - for (int i = 0; i < expressionTree.getChildCount(); i++) { - doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i), - aggregations); - } - } - - private ASTNode doPhase1GetDistinctFuncExpr( - HashMap aggregationTrees) throws SemanticException { - ASTNode expr = null; - for (Map.Entry entry : aggregationTrees.entrySet()) { - ASTNode value = entry.getValue(); - assert (value != null); - if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { - if (expr == null) { - expr = value; - } else { - throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg(expr)); - } - } - } - return expr; - } - - private void processTable(QB qb, ASTNode tabref) throws SemanticException { - // For each table reference get the table name - // and the alias (if alias is not present, the table name - // is used as an alias) - boolean tableSamplePresent = false; - int aliasIndex = 0; - if (tabref.getChildCount() == 2) { - // tablename tablesample - // OR - // tablename alias - ASTNode ct = (ASTNode)tabref.getChild(1); - if (ct.getToken().getType() == HiveParser.TOK_TABLESAMPLE) { - tableSamplePresent = true; - } - else { - aliasIndex = 1; - } - } - else if (tabref.getChildCount() == 3) { - // table name table sample alias - aliasIndex = 2; - tableSamplePresent = true; - } - ASTNode tableTree = (ASTNode)(tabref.getChild(0)); - String alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText()); - // If the alias is already there then we have a conflict - if (qb.exists(alias)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref.getChild(aliasIndex))); - } - if (tableSamplePresent) { - ASTNode sampleClause = (ASTNode)tabref.getChild(1); - ArrayList sampleCols = new ArrayList(); - if (sampleClause.getChildCount() > 2) { - for (int i = 2; i < sampleClause.getChildCount(); i++) { - sampleCols.add((ASTNode)sampleClause.getChild(i)); - } - } - // TODO: For now only support sampling on up to two columns - // Need to change it to list of columns - if (sampleCols.size() > 2) { - throw new SemanticException(ErrorMsg.SAMPLE_RESTRICTION.getMsg(tabref.getChild(0))); - } - qb.getParseInfo().setTabSample(alias, new TableSample( - unescapeIdentifier(sampleClause.getChild(0).getText()), - unescapeIdentifier(sampleClause.getChild(1).getText()), - sampleCols) - ); - } - // Insert this map into the stats - String table_name = unescapeIdentifier(tabref.getChild(0).getText()); - qb.setTabAlias(alias, table_name); - - qb.getParseInfo().setSrcForAlias(alias, tableTree); - } - - private void processSubQuery(QB qb, ASTNode subq) throws SemanticException { - - // This is a subquery and must have an alias - if (subq.getChildCount() != 2) { - throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq)); - } - ASTNode subqref = (ASTNode) subq.getChild(0); - String alias = unescapeIdentifier(subq.getChild(1).getText()); - - // Recursively do the first phase of semantic analysis for the subquery - QBExpr qbexpr = new QBExpr(alias); - - doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias); - - // If the alias is already there then we have a conflict - if (qb.exists(alias)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq.getChild(1))); - } - // Insert this map into the stats - qb.setSubqAlias(alias, qbexpr); - } - - private boolean isJoinToken(ASTNode node) - { - if ((node.getToken().getType() == HiveParser.TOK_JOIN) || - (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) || - (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) || - (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN) || - (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN) || - (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) - return true; - - return false; - } - - @SuppressWarnings("nls") - private void processJoin(QB qb, ASTNode join) throws SemanticException { - int numChildren = join.getChildCount(); - if ((numChildren != 2) && (numChildren != 3) - && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) - throw new SemanticException("Join with multiple children"); - - for (int num = 0; num < numChildren; num++) { - ASTNode child = (ASTNode) join.getChild(num); - if (child.getToken().getType() == HiveParser.TOK_TABREF) - processTable(qb, child); - else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) - processSubQuery(qb, child); - else if (isJoinToken(child)) - processJoin(qb, child); - } - } - - @SuppressWarnings({"fallthrough", "nls"}) - public void doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1) - throws SemanticException { - - QBParseInfo qbp = qb.getParseInfo(); - boolean skipRecursion = false; - - if (ast.getToken() != null) { - skipRecursion = true; - switch (ast.getToken().getType()) { - case HiveParser.TOK_SELECTDI: - qb.countSelDi(); - // fall through - case HiveParser.TOK_SELECT: - qb.countSel(); - qbp.setSelExprForClause(ctx_1.dest, ast); - - if (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) - qbp.setHints((ASTNode)ast.getChild(0)); - - LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast); - qbp.setAggregationExprsForClause(ctx_1.dest, aggregations); - qbp.setDistinctFuncExprForClause(ctx_1.dest, - doPhase1GetDistinctFuncExpr(aggregations)); - break; - - case HiveParser.TOK_WHERE: - qbp.setWhrExprForClause(ctx_1.dest, ast); - break; - - case HiveParser.TOK_DESTINATION: - ctx_1.dest = "insclause-" + ctx_1.nextNum; - ctx_1.nextNum++; - - // is there a insert in the subquery - if (qbp.getIsSubQ()) { - ASTNode ch = (ASTNode)ast.getChild(0); - if ((ch.getToken().getType() != HiveParser.TOK_DIR) || - (((ASTNode)ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE)) - throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); - } - - qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); - break; - - case HiveParser.TOK_FROM: - int child_count = ast.getChildCount(); - if (child_count != 1) - throw new SemanticException("Multiple Children " + child_count); - - // Check if this is a subquery - ASTNode frm = (ASTNode) ast.getChild(0); - if (frm.getToken().getType() == HiveParser.TOK_TABREF) - processTable(qb, frm); - else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) - processSubQuery(qb, frm); - else if (isJoinToken(frm)) - { - processJoin(qb, frm); - qbp.setJoinExpr(frm); - } - break; - - case HiveParser.TOK_CLUSTERBY: - // Get the clusterby aliases - these are aliased to the entries in the - // select list - qbp.setClusterByExprForClause(ctx_1.dest, ast); - break; - - case HiveParser.TOK_DISTRIBUTEBY: - // Get the distribute by aliases - these are aliased to the entries in the - // select list - qbp.setDistributeByExprForClause(ctx_1.dest, ast); - if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast)); - } - else if (qbp.getOrderByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg(ast)); - } - break; - - case HiveParser.TOK_SORTBY: - // Get the sort by aliases - these are aliased to the entries in the - // select list - qbp.setSortByExprForClause(ctx_1.dest, ast); - if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg(ast)); - } - else if (qbp.getOrderByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg(ast)); - } - - break; - - case HiveParser.TOK_ORDERBY: - // Get the order by aliases - these are aliased to the entries in the - // select list - qbp.setOrderByExprForClause(ctx_1.dest, ast); - if (qbp.getClusterByForClause(ctx_1.dest) != null) { - throw new SemanticException(ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg(ast)); - } - break; - - case HiveParser.TOK_GROUPBY: - // Get the groupby aliases - these are aliased to the entries in the - // select list - if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - throw new SemanticException(ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg(ast)); - } - qbp.setGroupByExprForClause(ctx_1.dest, ast); - skipRecursion = true; - break; - - case HiveParser.TOK_LIMIT: - qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText())); - break; - - case HiveParser.TOK_UNION: - // currently, we dont support subq1 union subq2 - the user has to explicitly say: - // select * from (subq1 union subq2) subqalias - if (!qbp.getIsSubQ()) - throw new SemanticException(ErrorMsg.UNION_NOTIN_SUBQ.getMsg()); - - default: - skipRecursion = false; - break; - } - } - - if (!skipRecursion) { - // Iterate over the rest of the children - int child_count = ast.getChildCount(); - for (int child_pos = 0; child_pos < child_count; ++child_pos) { - - // Recurse - doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1); - } - } - } - - private void genSamplePruners(QBExpr qbexpr) throws SemanticException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - genSamplePruners(qbexpr.getQB()); - } else { - genSamplePruners(qbexpr.getQBExpr1()); - genSamplePruners(qbexpr.getQBExpr2()); - } - } - - @SuppressWarnings("nls") - private void genSamplePruners(QB qb) throws SemanticException { - // Recursively prune subqueries - for (String alias : qb.getSubqAliases()) { - QBExpr qbexpr = qb.getSubqForAlias(alias); - genSamplePruners(qbexpr); - } - for (String alias : qb.getTabAliases()) { - String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias); - QBParseInfo qbp = qb.getParseInfo(); - TableSample tableSample = qbp.getTabSample(alias_id); - if (tableSample != null) { - SamplePruner pruner = new SamplePruner(alias, tableSample); - this.aliasToSamplePruner.put(alias_id, pruner); - } - } - } - - private void getMetaData(QBExpr qbexpr) throws SemanticException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - getMetaData(qbexpr.getQB()); - } else { - getMetaData(qbexpr.getQBExpr1()); - getMetaData(qbexpr.getQBExpr2()); - } - } - - @SuppressWarnings("nls") - public void getMetaData(QB qb) throws SemanticException { - try { - - LOG.info("Get metadata for source tables"); - - // Go over the tables and populate the related structures - for (String alias : qb.getTabAliases()) { - String tab_name = qb.getTabNameForAlias(alias); - Table tab = null; - try { - tab = this.db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tab_name); - } - catch (InvalidTableException ite) { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb.getParseInfo().getSrcForAlias(alias))); - } - - if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) - throw new SemanticException(ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg(qb.getParseInfo().getSrcForAlias(alias))); - - qb.getMetaData().setSrcForAlias(alias, tab); - } - - LOG.info("Get metadata for subqueries"); - // Go over the subqueries and getMetaData for these - for (String alias : qb.getSubqAliases()) { - QBExpr qbexpr = qb.getSubqForAlias(alias); - getMetaData(qbexpr); - } - - LOG.info("Get metadata for destination tables"); - // Go over all the destination structures and populate the related - // metadata - QBParseInfo qbp = qb.getParseInfo(); - - for (String name : qbp.getClauseNamesForDest()) { - ASTNode ast = qbp.getDestForClause(name); - switch (ast.getToken().getType()) { - case HiveParser.TOK_TAB: { - tableSpec ts = new tableSpec(this.db, conf, ast); - - if (!HiveOutputFormat.class.isAssignableFrom(ts.tableHandle.getOutputFormatClass())) - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg(ast)); - - if(ts.partSpec == null) { - // This is a table - qb.getMetaData().setDestForAlias(name, ts.tableHandle); - } else { - // This is a partition - qb.getMetaData().setDestForAlias(name, ts.partHandle); - } - break; - } - case HiveParser.TOK_LOCAL_DIR: - case HiveParser.TOK_DIR: - { - // This is a dfs file - String fname = stripQuotes(ast.getChild(0).getText()); - if ((!qb.getParseInfo().getIsSubQ()) && - (((ASTNode)ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) - { - fname = ctx.getMRTmpFileURI(); - ctx.setResDir(new Path(fname)); - - if ( qb.isCTAS() ) { - qb.setIsQuery(false); - } else { - qb.setIsQuery(true); - } - } - qb.getMetaData().setDestForAlias(name, fname, - (ast.getToken().getType() == HiveParser.TOK_DIR)); - break; - } - default: - throw new SemanticException("Unknown Token Type " + ast.getToken().getType()); - } - } - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - } - - private boolean isPresent(String[] list, String elem) { - for (String s : list) - if (s.equals(elem)) - return true; - - return false; - } - - @SuppressWarnings("nls") - private void parseJoinCondPopulateAlias(QBJoinTree joinTree, - ASTNode condn, Vector leftAliases, Vector rightAliases, - ArrayList fields) - throws SemanticException { - // String[] allAliases = joinTree.getAllAliases(); - switch (condn.getToken().getType()) { - case HiveParser.TOK_TABLE_OR_COL: - String tableOrCol = unescapeIdentifier(condn.getChild(0).getText().toLowerCase()); - if (isPresent(joinTree.getLeftAliases(), tableOrCol)) { - if (!leftAliases.contains(tableOrCol)) - leftAliases.add(tableOrCol); - } else if (isPresent(joinTree.getRightAliases(), tableOrCol)) { - if (!rightAliases.contains(tableOrCol)) - rightAliases.add(tableOrCol); - } else { - // We don't support columns without table prefix in JOIN condition right now. - // We need to pass Metadata here to know which table the column belongs to. - throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn.getChild(0))); - } - break; - - case HiveParser.Identifier: - // it may be a field name, return the identifier and let the caller decide whether it is or not - if ( fields != null ) { - fields.add(unescapeIdentifier(condn.getToken().getText().toLowerCase())); - } - break; - case HiveParser.Number: - case HiveParser.StringLiteral: - case HiveParser.TOK_CHARSETLITERAL: - case HiveParser.KW_TRUE: - case HiveParser.KW_FALSE: - break; - - case HiveParser.TOK_FUNCTION: - // check all the arguments - for (int i = 1; i < condn.getChildCount(); i++) - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i), - leftAliases, rightAliases, null); - break; - - default: - // This is an operator - so check whether it is unary or binary operator - if (condn.getChildCount() == 1) - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), - leftAliases, rightAliases, null); - else if (condn.getChildCount() == 2) { - - ArrayList fields1 = null; - // if it is a dot operator, remember the field name of the rhs of the left semijoin - if (joinTree.getNoSemiJoin() == false && - condn.getToken().getText().equals("." )) { - // get the semijoin rhs table name and field name - fields1 = new ArrayList(); - int rhssize = rightAliases.size(); - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), - leftAliases, rightAliases, null); - String rhsAlias = null; - - if ( rightAliases.size() > rhssize ) { // the new table is rhs table - rhsAlias = rightAliases.get(rightAliases.size()-1); - } - - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), - leftAliases, rightAliases, fields1); - if ( rhsAlias != null && fields1.size() > 0 ) { - joinTree.addRHSSemijoinColumns(rhsAlias, condn); - } - } else { - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), - leftAliases, rightAliases, null); - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), - leftAliases, rightAliases, fields1); - } - } else - throw new SemanticException(condn.toStringTree() + " encountered with " - + condn.getChildCount() + " children"); - break; - } - } - - private void populateAliases(Vector leftAliases, - Vector rightAliases, ASTNode condn, QBJoinTree joinTree, - Vector leftSrc) throws SemanticException { - if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(condn)); - - if (rightAliases.size() != 0) { - assert rightAliases.size() == 1; - joinTree.getExpressions().get(1).add(condn); - } else if (leftAliases.size() != 0) { - joinTree.getExpressions().get(0).add(condn); - for (String s : leftAliases) - if (!leftSrc.contains(s)) - leftSrc.add(s); - } else - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2.getMsg(condn)); - } - - /** - * Parse the join condition. - * If the condition is a join condition, throw an error if it is not an equality. Otherwise, break it into left and - * right expressions and store in the join tree. - * If the condition is a join filter, add it to the filter list of join tree. The join condition can contains conditions - * on both the left and tree trees and filters on either. Currently, we only support equi-joins, so we throw an error - * if the condition involves both subtrees and is not a equality. Also, we only support AND i.e ORs are not supported - * currently as their semantics are not very clear, may lead to data explosion and there is no usecase. - * @param joinTree jointree to be populated - * @param joinCond join condition - * @param leftSrc left sources - * @throws SemanticException - */ - private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, Vector leftSrc) - throws SemanticException { - if (joinCond == null) - return; - - switch (joinCond.getToken().getType()) { - case HiveParser.KW_OR: - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(joinCond)); - - case HiveParser.KW_AND: - parseJoinCondition(joinTree, (ASTNode) joinCond - .getChild(0), leftSrc); - parseJoinCondition(joinTree, (ASTNode) joinCond - .getChild(1), leftSrc); - break; - - case HiveParser.EQUAL: - ASTNode leftCondn = (ASTNode) joinCond.getChild(0); - Vector leftCondAl1 = new Vector(); - Vector leftCondAl2 = new Vector(); - parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null); - - ASTNode rightCondn = (ASTNode) joinCond.getChild(1); - Vector rightCondAl1 = new Vector(); - Vector rightCondAl2 = new Vector(); - parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2, null); - - // is it a filter or a join condition - if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) || - ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond)); - - if (leftCondAl1.size() != 0) { - if ((rightCondAl1.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) - joinTree.getFilters().get(0).add(joinCond); - else if (rightCondAl2.size() != 0) { - populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc); - populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc); - } - } - else if (leftCondAl2.size() != 0) { - if ((rightCondAl2.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) - joinTree.getFilters().get(1).add(joinCond); - else if (rightCondAl1.size() != 0) { - populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree, leftSrc); - populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree, leftSrc); - } - } - else if (rightCondAl1.size() != 0) - joinTree.getFilters().get(0).add(joinCond); - else - joinTree.getFilters().get(1).add(joinCond); - - break; - - default: - boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION); - - // Create all children - int childrenBegin = (isFunction ? 1 : 0); - ArrayList> leftAlias = new ArrayList>(joinCond.getChildCount() - childrenBegin); - ArrayList> rightAlias = new ArrayList>(joinCond.getChildCount() - childrenBegin); - for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) { - Vector left = new Vector(); - Vector right = new Vector(); - leftAlias.add(left); - rightAlias.add(right); - } - - for (int ci=childrenBegin; ci left : leftAlias) { - if (left.size() != 0) { - leftAliasNull = false; - break; - } - } - - boolean rightAliasNull = true; - for (Vector right : rightAlias) { - if (right.size() != 0) { - rightAliasNull = false; - break; - } - } - - if (!leftAliasNull && !rightAliasNull) - throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(joinCond)); - - if (!leftAliasNull) - joinTree.getFilters().get(0).add(joinCond); - else - joinTree.getFilters().get(1).add(joinCond); - - break; - } - } - - @SuppressWarnings("nls") - public Operator putOpInsertMap(Operator op, RowResolver rr) - { - OpParseContext ctx = new OpParseContext(rr); - opParseCtx.put(op, ctx); - return op; - } - - @SuppressWarnings("nls") - private Operator genFilterPlan(String dest, QB qb, - Operator input) throws SemanticException { - - ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); - return genFilterPlan(qb, (ASTNode)whereExpr.getChild(0), input); - } - - /** - * create a filter plan. The condition and the inputs are specified. - * @param qb current query block - * @param condn The condition to be resolved - * @param input the input operator - */ - @SuppressWarnings("nls") - private Operator genFilterPlan(QB qb, ASTNode condn, Operator input) throws SemanticException { - - OpParseContext inputCtx = opParseCtx.get(input); - RowResolver inputRR = inputCtx.getRR(); - Operator output = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new filterDesc(genExprNodeDesc(condn, inputRR), false), - new RowSchema(inputRR.getColumnInfos()), input), inputRR); - - LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " + inputRR.toString()); - return output; - } - - @SuppressWarnings("nls") - private Integer genColListRegex(String colRegex, String tabAlias, String alias, ASTNode sel, - ArrayList col_list, RowResolver input, Integer pos, - RowResolver output) throws SemanticException { - - // The table alias should exist - if (tabAlias != null && !input.hasTableAlias(tabAlias)) - throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel)); - - // TODO: Have to put in the support for AS clause - Pattern regex = null; - try { - regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE); - } catch (PatternSyntaxException e) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e.getMessage())); - } - - int matched = 0; - // This is the tab.* case - // In this case add all the columns to the fieldList - // from the input schema - for(ColumnInfo colInfo: input.getColumnInfos()) { - String name = colInfo.getInternalName(); - String [] tmp = input.reverseLookup(name); - - // Skip the colinfos which are not for this particular alias - if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) { - continue; - } - - // Not matching the regex? - if (!regex.matcher(tmp[1]).matches()) { - continue; - } - - exprNodeColumnDesc expr = new exprNodeColumnDesc(colInfo.getType(), name, - colInfo.getTabAlias(), - colInfo.getIsPartitionCol()); - col_list.add(expr); - output.put(tmp[0], tmp[1], - new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol())); - pos = Integer.valueOf(pos.intValue() + 1); - matched ++; - } - if (matched == 0) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel)); - } - return pos; - } - - public static String getColumnInternalName(int pos) { - return HiveConf.getColumnInternalName(pos); - } - - - /** - * If the user script command needs any modifications - do it here - */ - private String getFixedCmd(String cmd) { - SessionState ss = SessionState.get(); - if(ss == null) - return cmd; - - // for local mode - replace any references to packaged files by name with - // the reference to the original file path - if(ss.getConf().get("mapred.job.tracker", "local").equals("local")) { - Set files = ss.list_resource(SessionState.ResourceType.FILE, null); - if((files != null) && !files.isEmpty()) { - int end = cmd.indexOf(" "); - String prog = (end == -1) ? cmd : cmd.substring(0, end); - String args = (end == -1) ? "" : cmd.substring(end, cmd.length()); - - for(String oneFile: files) { - Path p = new Path(oneFile); - if(p.getName().equals(prog)) { - cmd = oneFile + args; - break; - } - } - } - } - - return cmd; - } - - private tableDesc getTableDescFromSerDe(ASTNode child, String cols, String colTypes, boolean defaultCols) throws SemanticException { - if (child.getType() == HiveParser.TOK_SERDENAME) { - String serdeName = unescapeSQLString(child.getChild(0).getText()); - Class serdeClass = null; - - try { - serdeClass = (Class)Class.forName(serdeName, true, JavaUtils.getClassLoader()); - } catch (ClassNotFoundException e) { - throw new SemanticException(e); - } - - tableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer.toString(Utilities.tabCode), cols, colTypes, defaultCols, true); - // copy all the properties - if (child.getChildCount() == 2) { - ASTNode prop = (ASTNode)((ASTNode)child.getChild(1)).getChild(0); - for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { - String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); - String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); - tblDesc.getProperties().setProperty(key,value); - } - } - return tblDesc; - } - else if (child.getType() == HiveParser.TOK_SERDEPROPS) { - tableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols); - int numChildRowFormat = child.getChildCount(); - for (int numC = 0; numC < numChildRowFormat; numC++) - { - ASTNode rowChild = (ASTNode)child.getChild(numC); - switch (rowChild.getToken().getType()) { - case HiveParser.TOK_TABLEROWFORMATFIELD: - String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText()); - tblDesc.getProperties().setProperty(Constants.FIELD_DELIM, fieldDelim); - tblDesc.getProperties().setProperty(Constants.SERIALIZATION_FORMAT, fieldDelim); - - if (rowChild.getChildCount()>=2) { - String fieldEscape = unescapeSQLString(rowChild.getChild(1).getText()); - tblDesc.getProperties().setProperty(Constants.ESCAPE_CHAR, fieldEscape); - } - break; - case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: - tblDesc.getProperties().setProperty(Constants.COLLECTION_DELIM, unescapeSQLString(rowChild.getChild(0).getText())); - break; - case HiveParser.TOK_TABLEROWFORMATMAPKEYS: - tblDesc.getProperties().setProperty(Constants.MAPKEY_DELIM, unescapeSQLString(rowChild.getChild(0).getText())); - break; - case HiveParser.TOK_TABLEROWFORMATLINES: - tblDesc.getProperties().setProperty(Constants.LINE_DELIM, unescapeSQLString(rowChild.getChild(0).getText())); - break; - default: assert false; - } - } - - return tblDesc; - } - - // should never come here - return null; - } - - private void failIfColAliasExists(Set nameSet, String name) throws SemanticException { - if(nameSet.contains(name)) - throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS.getMsg(name)); - nameSet.add(name); - } - - @SuppressWarnings("nls") - private Operator genScriptPlan(ASTNode trfm, QB qb, - Operator input) throws SemanticException { - // If there is no "AS" clause, the output schema will be "key,value" - ArrayList outputCols = new ArrayList(); - int inputSerDeNum = 1, inputRecordWriterNum = 2; - int outputSerDeNum = 4, outputRecordReaderNum = 5; - int outputColsNum = 6; - boolean outputColNames = false, outputColSchemas = false; - int execPos = 3; - boolean defaultOutputCols = false; - - // Go over all the children - if (trfm.getChildCount() > outputColsNum) { - ASTNode outCols = (ASTNode)trfm.getChild(outputColsNum); - if (outCols.getType() == HiveParser.TOK_ALIASLIST) - outputColNames = true; - else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) - outputColSchemas = true; - } - - // If column type is not specified, use a string - if (!outputColNames && !outputColSchemas) { - outputCols.add(new ColumnInfo("key", TypeInfoFactory.stringTypeInfo, null, false)); - outputCols.add(new ColumnInfo("value", TypeInfoFactory.stringTypeInfo, null, false)); - defaultOutputCols = true; - } - else { - ASTNode collist = (ASTNode) trfm.getChild(outputColsNum); - int ccount = collist.getChildCount(); - - Set colAliasNamesDuplicateCheck = new HashSet(); - if (outputColNames) { - for (int i=0; i < ccount; ++i) { - String colAlias = unescapeIdentifier(((ASTNode)collist.getChild(i)).getText()); - failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); - outputCols.add(new ColumnInfo(colAlias, TypeInfoFactory.stringTypeInfo, null, false)); - } - } - else { - for (int i=0; i < ccount; ++i) { - ASTNode child = (ASTNode) collist.getChild(i); - assert child.getType() == HiveParser.TOK_TABCOL; - String colAlias = unescapeIdentifier(((ASTNode)child.getChild(0)).getText()); - failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); - outputCols.add(new ColumnInfo(colAlias, - TypeInfoUtils.getTypeInfoFromTypeString(DDLSemanticAnalyzer.getTypeName(((ASTNode)child.getChild(1)).getType())), null, false)); - } - } - } - - RowResolver out_rwsch = new RowResolver(); - StringBuilder columns = new StringBuilder(); - StringBuilder columnTypes = new StringBuilder(); - - for (int i = 0; i < outputCols.size(); ++i) { - if (i != 0) { - columns.append(","); - columnTypes.append(","); - } - - columns.append(outputCols.get(i).getInternalName()); - columnTypes.append(outputCols.get(i).getType().getTypeName()); - - out_rwsch.put( - qb.getParseInfo().getAlias(), - outputCols.get(i).getInternalName(), - outputCols.get(i)); - } - - StringBuilder inpColumns = new StringBuilder(); - StringBuilder inpColumnTypes = new StringBuilder(); - Vector inputSchema = opParseCtx.get(input).getRR().getColumnInfos(); - for (int i = 0; i < inputSchema.size(); ++i) { - if (i != 0) { - inpColumns.append(","); - inpColumnTypes.append(","); - } - - inpColumns.append(inputSchema.get(i).getInternalName()); - inpColumnTypes.append(inputSchema.get(i).getType().getTypeName()); - } - - tableDesc outInfo; - tableDesc inInfo; - String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE); - Class serde; - - try { - serde = (Class)Class.forName(defaultSerdeName, true, JavaUtils.getClassLoader()); - } catch (ClassNotFoundException e) { - throw new SemanticException(e); - } - - // Input and Output Serdes - if (trfm.getChild(inputSerDeNum).getChildCount() > 0) - inInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false); - else - inInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), inpColumns.toString(), inpColumnTypes.toString(), false, true); - - if (trfm.getChild(outputSerDeNum).getChildCount() > 0) - outInfo = getTableDescFromSerDe((ASTNode)(((ASTNode)trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false); - // This is for backward compatibility. If the user did not specify the output column list, we assume that there are 2 columns: key and value. - // However, if the script outputs: col1, col2, col3 seperated by TAB, the requirement is: key is col and value is (col2 TAB col3) - else - outInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), columns.toString(), columnTypes.toString(), defaultOutputCols); - - // Output record readers - Class outRecordReader = getRecordReader((ASTNode)trfm.getChild(outputRecordReaderNum)); - Class inRecordWriter = getRecordWriter((ASTNode)trfm.getChild(inputRecordWriterNum)); - - Operator output = putOpInsertMap(OperatorFactory - .getAndMakeChild( - new scriptDesc(getFixedCmd(stripQuotes(trfm.getChild(execPos).getText())), - inInfo, inRecordWriter, outInfo, outRecordReader), - new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); - - return output; - } - - private Class getRecordReader(ASTNode node) throws SemanticException { - String name; - - if (node.getChildCount() == 0) - name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER); - else - name = unescapeSQLString(node.getChild(0).getText()); - - try { - return (Class)Class.forName(name, true, JavaUtils.getClassLoader()); - } catch (ClassNotFoundException e) { - throw new SemanticException(e); - } - } - - private Class getRecordWriter(ASTNode node) throws SemanticException { - String name; - - if (node.getChildCount() == 0) - name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER); - else - name = unescapeSQLString(node.getChild(0).getText()); - - try { - return (Class)Class.forName(name, true, JavaUtils.getClassLoader()); - } catch (ClassNotFoundException e) { - throw new SemanticException(e); - } - } - - /** - * This function is a wrapper of parseInfo.getGroupByForClause which automatically - * translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY a,b,c. - */ - static List getGroupByForClause(QBParseInfo parseInfo, String dest) { - if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - ASTNode selectExprs = parseInfo.getSelForClause(dest); - List result = new ArrayList(selectExprs == null - ? 0 : selectExprs.getChildCount()); - if (selectExprs != null) { - for (int i = 0; i < selectExprs.getChildCount(); ++i) { - // table.column AS alias - ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0); - result.add(grpbyExpr); - } - } - return result; - } else { - ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); - List result = new ArrayList(grpByExprs == null - ? 0 : grpByExprs.getChildCount()); - if (grpByExprs != null) { - for (int i = 0; i < grpByExprs.getChildCount(); ++i) { - ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); - result.add(grpbyExpr); - } - } - return result; - } - } - - private static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR) { - String colAlias = null; - String tabAlias = null; - String[] colRef = new String[2]; - - if (selExpr.getChildCount() == 2) { - // return zz for "xx + yy AS zz" - colAlias = unescapeIdentifier(selExpr.getChild(1).getText()); - colRef[0] = tabAlias; - colRef[1] = colAlias; - return colRef; - } - - ASTNode root = (ASTNode) selExpr.getChild(0); - if (root.getType() == HiveParser.TOK_TABLE_OR_COL) { - colAlias = root.getChild(0).getText(); - colRef[0] = tabAlias; - colRef[1] = colAlias; - return colRef; - } - - if (root.getType() == HiveParser.DOT) { - ASTNode tab = (ASTNode) root.getChild(0); - if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) { - String t = unescapeIdentifier(tab.getChild(0).getText()); - if (inputRR.hasTableAlias(t)) { - tabAlias = t; - } - } - - // Return zz for "xx.zz" and "xx.yy.zz" - ASTNode col = (ASTNode) root.getChild(1); - if (col.getType() == HiveParser.Identifier) { - colAlias = unescapeIdentifier(col.getText()); - } - } - - if(colAlias == null) { - // Return defaultName if selExpr is not a simple xx.yy.zz - colAlias = defaultName; - } - - colRef[0] = tabAlias; - colRef[1] = colAlias; - return colRef; - } - - /** - * Returns whether the pattern is a regex expression (instead of a normal string). - * Normal string is a string with all alphabets/digits and "_". - */ - private static boolean isRegex(String pattern) { - for(int i=0; i col_list = new ArrayList(); - RowResolver out_rwsch = new RowResolver(); - ASTNode trfm = null; - String alias = qb.getParseInfo().getAlias(); - Integer pos = Integer.valueOf(0); - RowResolver inputRR = opParseCtx.get(input).getRR(); - // SELECT * or SELECT TRANSFORM(*) - boolean selectStar = false; - int posn = 0; - boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); - if (hintPresent) { - posn++; - } - - boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() - == HiveParser.TOK_TRANSFORM); - if (isInTransform) { - trfm = (ASTNode) selExprList.getChild(posn).getChild(0); - } - - // The list of expressions after SELECT or SELECT TRANSFORM. - ASTNode exprList = (isInTransform ? (ASTNode) trfm.getChild(0) : selExprList); - - LOG.debug("genSelectPlan: input = " + inputRR.toString()); - // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM) - for (int i = posn; i < exprList.getChildCount(); ++i) { - - // child can be EXPR AS ALIAS, or EXPR. - ASTNode child = (ASTNode) exprList.getChild(i); - boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2); - // The real expression - ASTNode expr; - String tabAlias; - String colAlias; - - if (isInTransform) { - tabAlias = null; - colAlias = "_C" + i; - expr = child; - } else { - String[] colRef = getColAlias(child, "_C" + i, inputRR); - tabAlias = colRef[0]; - colAlias = colRef[1]; - // Get rid of TOK_SELEXPR - expr = (ASTNode)child.getChild(0); - } - - if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", - expr.getChildCount() == 0 ? null : unescapeIdentifier(expr.getChild(0).getText().toLowerCase()), - alias, expr, col_list, inputRR, pos, out_rwsch); - selectStar = true; - } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL - && !hasAsClause - && !inputRR.getIsExprResolver() - && isRegex(unescapeIdentifier(expr.getChild(0).getText()))) { - // In case the expression is a regex COL. - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), - null, alias, expr, col_list, inputRR, pos, out_rwsch); - } else if (expr.getType() == HiveParser.DOT - && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL - && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase())) - && !hasAsClause - && !inputRR.getIsExprResolver() - && isRegex(unescapeIdentifier(expr.getChild(1).getText()))) { - // In case the expression is TABLE.COL (col can be regex). - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), - unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), - alias, expr, col_list, inputRR, pos, out_rwsch); - } else { - // Case when this is an expression - exprNodeDesc exp = genExprNodeDesc(expr, inputRR); - col_list.add(exp); - if (!StringUtils.isEmpty(alias) && - (out_rwsch.get(null, colAlias) != null)) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(expr.getChild(1))); - } - out_rwsch.put(tabAlias, colAlias, - new ColumnInfo(getColumnInternalName(pos), - exp.getTypeInfo(), tabAlias, false)); - pos = Integer.valueOf(pos.intValue() + 1); - } - } - selectStar = selectStar && exprList.getChildCount() == posn + 1; - - ArrayList columnNames = new ArrayList(); - Map colExprMap = new HashMap(); - for (int i=0; i convertedParameters; - GenericUDAFEvaluator genericUDAFEvaluator; - TypeInfo returnType; - } - - /** - * Convert exprNodeDesc array to Typeinfo array. - */ - static ArrayList getTypeInfo(ArrayList exprs) { - ArrayList result = new ArrayList(); - for(exprNodeDesc expr: exprs) { - result.add(expr.getTypeInfo()); - } - return result; - } - - /** - * Convert exprNodeDesc array to Typeinfo array. - */ - static ObjectInspector[] getStandardObjectInspector(ArrayList exprs) { - ObjectInspector[] result = new ObjectInspector[exprs.size()]; - for (int i=0; i aggParameters, - ASTNode aggTree) throws SemanticException { - ArrayList originalParameterTypeInfos = getTypeInfo(aggParameters); - GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( - aggName, originalParameterTypeInfos); - if (null == result) { - String reason = "Looking for UDAF Evaluator\"" + aggName + "\" with parameters " - + originalParameterTypeInfos; - throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE. - getMsg((ASTNode)aggTree.getChild(0), reason)); - } - return result; - } - - /** - * Returns the GenericUDAFInfo struct for the aggregation. - * @param aggName The name of the UDAF. - * @param aggParameters The exprNodeDesc of the original parameters - * @param aggTree The ASTNode node of the UDAF in the query. - * @return GenericUDAFInfo - * @throws SemanticException when the UDAF is not found or has problems. - */ - static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, - GenericUDAFEvaluator.Mode emode, ArrayList aggParameters) - throws SemanticException { - - GenericUDAFInfo r = new GenericUDAFInfo(); - - // set r.genericUDAFEvaluator - r.genericUDAFEvaluator = evaluator; - - // set r.returnType - ObjectInspector returnOI = null; - try { - ObjectInspector[] aggObjectInspectors = - getStandardObjectInspector(getTypeInfo(aggParameters)); - returnOI = r.genericUDAFEvaluator.init(emode, aggObjectInspectors); - r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI); - } catch (HiveException e) { - throw new SemanticException(e); - } - // set r.convertedParameters - // TODO: type conversion - r.convertedParameters = aggParameters; - - return r; - } - - private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(groupByDesc.Mode mode, boolean isDistinct) { - switch (mode) { - case COMPLETE: return GenericUDAFEvaluator.Mode.COMPLETE; - case PARTIAL1: return GenericUDAFEvaluator.Mode.PARTIAL1; - case PARTIAL2: return GenericUDAFEvaluator.Mode.PARTIAL2; - case PARTIALS: return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1 : GenericUDAFEvaluator.Mode.PARTIAL2; - case FINAL: return GenericUDAFEvaluator.Mode.FINAL; - case HASH: return GenericUDAFEvaluator.Mode.PARTIAL1; - case MERGEPARTIAL: return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE : GenericUDAFEvaluator.Mode.FINAL; - default: - throw new RuntimeException("internal error in groupByDescModeToUDAFMode"); - } - } - /** - * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). - * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. - * - * @param mode The mode of the aggregation (PARTIAL1 or COMPLETE) - * @param genericUDAFEvaluators If not null, this function will store the mapping - * from Aggregation StringTree to the genericUDAFEvaluator in this parameter, - * so it can be used in the next-stage GroupBy aggregations. - * @return the new GroupByOperator - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanGroupByOperator( - QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, - groupByDesc.Mode mode, Map genericUDAFEvaluators) - throws SemanticException { - RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRR(); - RowResolver groupByOutputRowResolver = new RowResolver(); - groupByOutputRowResolver.setIsExprResolver(true); - ArrayList groupByKeys = new ArrayList(); - ArrayList aggregations = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); - Map colExprMap = new HashMap(); - List grpByExprs = getGroupByForClause(parseInfo, dest); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - String text = grpbyExpr.toStringTree(); - ColumnInfo exprInfo = groupByInputRowResolver.get("",text); - - if (exprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); - } - - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), - exprInfo.getInternalName(), "", false)); - String field = getColumnInternalName(i); - outputColumnNames.add(field); - groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType(), null, false)); - colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); - } - // For each aggregation - HashMap aggregationTrees = parseInfo - .getAggregationExprsForClause(dest); - assert (aggregationTrees != null); - for (Map.Entry entry : aggregationTrees.entrySet()) { - ASTNode value = entry.getValue(); - - // This is the GenericUDAF name - String aggName = value.getChild(0).getText(); - - // Convert children to aggParameters - ArrayList aggParameters = new ArrayList(); - // 0 is the function name - for (int i = 1; i < value.getChildCount(); i++) { - String text = value.getChild(i).toStringTree(); - ASTNode paraExpr = (ASTNode)value.getChild(i); - ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); - if (paraExprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr)); - } - - String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), - paraExprInfo.getInternalName(), - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); - } - - boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; - Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); - assert(genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, - isDistinct, amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() -1); - outputColumnNames.add(field); - groupByOutputRowResolver.put("",value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); - // Save the evaluator so that it can be used by the next-stage GroupByOperators - if (genericUDAFEvaluators != null) { - genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); - } - } - - Operator op = - putOpInsertMap(OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - reduceSinkOperatorInfo), - groupByOutputRowResolver - ); - op.setColumnExprMap(colExprMap); - return op; - } - - /** - * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). - * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. - * - * @param mode The mode of the aggregation (MERGEPARTIAL, PARTIAL2) - * @param genericUDAFEvaluators The mapping from Aggregation StringTree to the - * genericUDAFEvaluator. - * @param distPartAggr partial aggregation for distincts - * @return the new GroupByOperator - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanGroupByOperator1( - QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, - groupByDesc.Mode mode, Map genericUDAFEvaluators, boolean distPartAgg) - throws SemanticException { - ArrayList outputColumnNames = new ArrayList(); - RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRR(); - RowResolver groupByOutputRowResolver = new RowResolver(); - groupByOutputRowResolver.setIsExprResolver(true); - ArrayList groupByKeys = new ArrayList(); - ArrayList aggregations = new ArrayList(); - List grpByExprs = getGroupByForClause(parseInfo, dest); - Map colExprMap = new HashMap(); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - String text = grpbyExpr.toStringTree(); - ColumnInfo exprInfo = groupByInputRowResolver.get("",text); - - if (exprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); - } - - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), - exprInfo.getInternalName(), - exprInfo.getTabAlias(), - exprInfo.getIsPartitionCol())); - String field = getColumnInternalName(i); - outputColumnNames.add(field); - groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType(), "", false)); - colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); - } - - HashMap aggregationTrees = parseInfo - .getAggregationExprsForClause(dest); - for (Map.Entry entry : aggregationTrees.entrySet()) { - ASTNode value = entry.getValue(); - String aggName = value.getChild(0).getText(); - ArrayList aggParameters = new ArrayList(); - - // If the function is distinct, partial aggregartion has not been done on the client side. - // If distPartAgg is set, the client is letting us know that partial aggregation has not been done. - // For eg: select a, count(b+c), count(distinct d+e) group by a - // For count(b+c), if partial aggregation has been performed, then we directly look for count(b+c). - // Otherwise, we look for b+c. - // For distincts, partial aggregation is never performed on the client side, so always look for the parameters: d+e - boolean partialAggDone = !(distPartAgg || (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI)); - if (!partialAggDone) { - // 0 is the function name - for (int i = 1; i < value.getChildCount(); i++) { - String text = value.getChild(i).toStringTree(); - ASTNode paraExpr = (ASTNode)value.getChild(i); - ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); - if (paraExprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr)); - } - - String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), - paraExprInfo.getInternalName(), - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); - } - } - else { - String text = entry.getKey(); - ColumnInfo paraExprInfo = groupByInputRowResolver.get("",text); - if (paraExprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value)); - } - String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); - } - boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI); - Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = null; - // For distincts, partial aggregations have not been done - if (distPartAgg) { - genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); - assert(genericUDAFEvaluator != null); - genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); - } - else { - genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey()); - assert(genericUDAFEvaluator != null); - } - - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, - (mode != groupByDesc.Mode.FINAL && isDistinct), amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); - outputColumnNames.add(field); - groupByOutputRowResolver.put("", value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); - } - - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, distPartAgg), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - reduceSinkOperatorInfo), - groupByOutputRowResolver); - op.setColumnExprMap(colExprMap); - return op; - } - - /** - * Generate the map-side GroupByOperator for the Query Block (qb.getParseInfo().getXXX(dest)). - * The new GroupByOperator will be a child of the inputOperatorInfo. - * - * @param mode The mode of the aggregation (HASH) - * @param genericUDAFEvaluators If not null, this function will store the mapping - * from Aggregation StringTree to the genericUDAFEvaluator in this parameter, - * so it can be used in the next-stage GroupBy aggregations. - * @return the new GroupByOperator - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, Operator inputOperatorInfo, - groupByDesc.Mode mode, Map genericUDAFEvaluators) throws SemanticException { - - RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); - QBParseInfo parseInfo = qb.getParseInfo(); - RowResolver groupByOutputRowResolver = new RowResolver(); - groupByOutputRowResolver.setIsExprResolver(true); - ArrayList groupByKeys = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); - ArrayList aggregations = new ArrayList(); - Map colExprMap = new HashMap(); - List grpByExprs = getGroupByForClause(parseInfo, dest); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - exprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver); - - groupByKeys.add(grpByExprNode); - String field = getColumnInternalName(i); - outputColumnNames.add(field); - groupByOutputRowResolver.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); - colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); - } - - // If there is a distinctFuncExp, add all parameters to the reduceKeys. - if (parseInfo.getDistinctFuncExprForClause(dest) != null) { - ASTNode value = parseInfo.getDistinctFuncExprForClause(dest); - int numDistn=0; - // 0 is function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode parameter = (ASTNode) value.getChild(i); - String text = parameter.toStringTree(); - if (groupByOutputRowResolver.get("",text) == null) { - exprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver); - groupByKeys.add(distExprNode); - numDistn++; - String field = getColumnInternalName(grpByExprs.size() + numDistn -1); - outputColumnNames.add(field); - groupByOutputRowResolver.put("", text, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false)); - colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); - } - } - } - - // For each aggregation - HashMap aggregationTrees = parseInfo - .getAggregationExprsForClause(dest); - assert (aggregationTrees != null); - - for (Map.Entry entry : aggregationTrees.entrySet()) { - ASTNode value = entry.getValue(); - String aggName = value.getChild(0).getText(); - ArrayList aggParameters = new ArrayList(); - ArrayList> aggClasses = new ArrayList>(); - // 0 is the function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode)value.getChild(i); - exprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); - - aggParameters.add(paraExprNode); - } - - boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; - Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); - - GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value); - assert(genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, - isDistinct, amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() -1); - outputColumnNames.add(field); - groupByOutputRowResolver.put("",value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); - // Save the evaluator so that it can be used by the next-stage GroupByOperators - if (genericUDAFEvaluators != null) { - genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); - } - } - - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - groupByOutputRowResolver); - op.setColumnExprMap(colExprMap); - return op; - } - - - /** - * Generate the ReduceSinkOperator for the Group By Query Block (qb.getPartInfo().getXXX(dest)). - * The new ReduceSinkOperator will be a child of inputOperatorInfo. - * - * It will put all Group By keys and the distinct field (if any) in the map-reduce sort key, - * and all other fields in the map-reduce value. - * - * @param numPartitionFields the number of fields for map-reduce partitioning. - * This is usually the number of fields in the Group By keys. - * @return the new ReduceSinkOperator. - * @throws SemanticException - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanReduceSinkOperator(QB qb, - String dest, Operator inputOperatorInfo, int numPartitionFields, int numReducers, boolean mapAggrDone) throws SemanticException { - - RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); - QBParseInfo parseInfo = qb.getParseInfo(); - RowResolver reduceSinkOutputRowResolver = new RowResolver(); - reduceSinkOutputRowResolver.setIsExprResolver(true); - Map colExprMap = new HashMap(); - ArrayList reduceKeys = new ArrayList(); - // Pre-compute group-by keys and store in reduceKeys - - List outputColumnNames = new ArrayList(); - List grpByExprs = getGroupByForClause(parseInfo, dest); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - exprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr, reduceSinkInputRowResolver); - reduceKeys.add(inputExpr); - String text = grpbyExpr.toStringTree(); - if (reduceSinkOutputRowResolver.get("", text) == null) { - outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); - reduceSinkOutputRowResolver.put("", text, colInfo); - colExprMap.put(colInfo.getInternalName(), inputExpr); - } else { - throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY.getMsg(grpbyExpr)); - } - } - - // If there is a distinctFuncExp, add all parameters to the reduceKeys. - if (parseInfo.getDistinctFuncExprForClause(dest) != null) { - ASTNode value = parseInfo.getDistinctFuncExprForClause(dest); - // 0 is function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode parameter = (ASTNode) value.getChild(i); - String text = parameter.toStringTree(); - if (reduceSinkOutputRowResolver.get("",text) == null) { - reduceKeys.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); - outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), null, false); - reduceSinkOutputRowResolver.put("", text, colInfo); - colExprMap.put(colInfo.getInternalName(), reduceKeys.get(reduceKeys.size()-1)); - } - } - } - - ArrayList reduceValues = new ArrayList(); - HashMap aggregationTrees = parseInfo.getAggregationExprsForClause(dest); - - if (!mapAggrDone) { - // Put parameters to aggregations in reduceValues - for (Map.Entry entry : aggregationTrees.entrySet()) { - ASTNode value = entry.getValue(); - // 0 is function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode parameter = (ASTNode) value.getChild(i); - String text = parameter.toStringTree(); - if (reduceSinkOutputRowResolver.get("",text) == null) { - reduceValues.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); - outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - reduceSinkOutputRowResolver.put("", text, - new ColumnInfo(field, - reduceValues.get(reduceValues.size()-1).getTypeInfo(), - null, false)); - } - } - } - } - else - { - // Put partial aggregation results in reduceValues - int inputField = reduceKeys.size(); - - for (Map.Entry entry : aggregationTrees.entrySet()) { - - TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType(); - reduceValues.add(new exprNodeColumnDesc(type, getColumnInternalName(inputField), - "", false)); - inputField++; - outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - reduceSinkOutputRowResolver.put("", ((ASTNode)entry.getValue()).toStringTree(), - new ColumnInfo(field, - type, null, false)); - } - } - - ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, - numReducers), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - reduceSinkOutputRowResolver - ); - rsOp.setColumnExprMap(colExprMap); - return rsOp; - } - - /** - * Generate the second ReduceSinkOperator for the Group By Plan (parseInfo.getXXX(dest)). - * The new ReduceSinkOperator will be a child of groupByOperatorInfo. - * - * The second ReduceSinkOperator will put the group by keys in the map-reduce sort - * key, and put the partial aggregation results in the map-reduce value. - * - * @param numPartitionFields the number of fields in the map-reduce partition key. - * This should always be the same as the number of Group By keys. We should be - * able to remove this parameter since in this phase there is no distinct any more. - * @return the new ReduceSinkOperator. - * @throws SemanticException - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanReduceSinkOperator2MR( - QBParseInfo parseInfo, String dest, Operator groupByOperatorInfo, int numPartitionFields, int numReducers) - throws SemanticException { - RowResolver reduceSinkInputRowResolver2 = opParseCtx.get(groupByOperatorInfo).getRR(); - RowResolver reduceSinkOutputRowResolver2 = new RowResolver(); - reduceSinkOutputRowResolver2.setIsExprResolver(true); - Map colExprMap = new HashMap(); - ArrayList reduceKeys = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); - // Get group-by keys and store in reduceKeys - List grpByExprs = getGroupByForClause(parseInfo, dest); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - String field = getColumnInternalName(i); - outputColumnNames.add(field); - TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", grpbyExpr.toStringTree()).getType(); - exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(typeInfo, field, "", false); - reduceKeys.add(inputExpr); - ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + field, - typeInfo, "", false); - reduceSinkOutputRowResolver2.put("", grpbyExpr.toStringTree(), - colInfo); - colExprMap.put(colInfo.getInternalName(), inputExpr); - } - // Get partial aggregation results and store in reduceValues - ArrayList reduceValues = new ArrayList(); - int inputField = reduceKeys.size(); - HashMap aggregationTrees = parseInfo - .getAggregationExprsForClause(dest); - for (Map.Entry entry : aggregationTrees.entrySet()) { - String field = getColumnInternalName(inputField); - ASTNode t = entry.getValue(); - TypeInfo typeInfo = reduceSinkInputRowResolver2.get("", t.toStringTree()).getType(); - reduceValues.add(new exprNodeColumnDesc(typeInfo, field, "", false)); - inputField++; - String col = getColumnInternalName(reduceValues.size()-1); - outputColumnNames.add(col); - reduceSinkOutputRowResolver2.put("", t.toStringTree(), - new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, - typeInfo, "", false)); - } - - ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, - numPartitionFields, numReducers), - new RowSchema(reduceSinkOutputRowResolver2.getColumnInfos()), - groupByOperatorInfo), - reduceSinkOutputRowResolver2 - ); - - rsOp.setColumnExprMap(colExprMap); - return rsOp; - } - - /** - * Generate the second GroupByOperator for the Group By Plan (parseInfo.getXXX(dest)). - * The new GroupByOperator will do the second aggregation based on the partial aggregation - * results. - * - * @param mode the mode of aggregation (FINAL) - * @param genericUDAFEvaluators The mapping from Aggregation StringTree to the - * genericUDAFEvaluator. - * @return the new GroupByOperator - * @throws SemanticException - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanGroupByOperator2MR( - QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, - groupByDesc.Mode mode, Map genericUDAFEvaluators) - throws SemanticException { - RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRR(); - RowResolver groupByOutputRowResolver2 = new RowResolver(); - groupByOutputRowResolver2.setIsExprResolver(true); - ArrayList groupByKeys = new ArrayList(); - ArrayList aggregations = new ArrayList(); - Map colExprMap = new HashMap(); - List grpByExprs = getGroupByForClause(parseInfo, dest); - ArrayList outputColumnNames = new ArrayList(); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - String text = grpbyExpr.toStringTree(); - ColumnInfo exprInfo = groupByInputRowResolver2.get("",text); - if (exprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); - } - - String expression = exprInfo.getInternalName(); - groupByKeys.add(new exprNodeColumnDesc(exprInfo.getType(), expression, - exprInfo.getTabAlias(), - exprInfo.getIsPartitionCol())); - String field = getColumnInternalName(i); - outputColumnNames.add(field); - groupByOutputRowResolver2.put("",grpbyExpr.toStringTree(), - new ColumnInfo(field, exprInfo.getType(), "", false)); - colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); - } - HashMap aggregationTrees = parseInfo - .getAggregationExprsForClause(dest); - for (Map.Entry entry : aggregationTrees.entrySet()) { - ArrayList aggParameters = new ArrayList(); - ASTNode value = entry.getValue(); - String text = entry.getKey(); - ColumnInfo paraExprInfo = groupByInputRowResolver2.get("",text); - if (paraExprInfo == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value)); - } - String paraExpression = paraExprInfo.getInternalName(); - assert(paraExpression != null); - aggParameters.add(new exprNodeColumnDesc(paraExprInfo.getType(), paraExpression, - paraExprInfo.getTabAlias(), - paraExprInfo.getIsPartitionCol())); - - String aggName = value.getChild(0).getText(); - - boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; - Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey()); - assert(genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - aggregations.add(new aggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, - (mode != groupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), - amode)); - String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); - outputColumnNames.add(field); - groupByOutputRowResolver2.put("", value.toStringTree(), - new ColumnInfo(field, - udaf.returnType, "", false)); - } - - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild(new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver2.getColumnInfos()), - reduceSinkOperatorInfo2), - groupByOutputRowResolver2 - ); - op.setColumnExprMap(colExprMap); - return op; - } - - /** - * Generate a Group-By plan using a single map-reduce job (3 operators will be - * inserted): - * - * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, - * A2_EXP) ) SortGroupBy (keys = (KEY.0,KEY.1), aggregations = - * (count_distinct(KEY.2), sum(VALUE.0), count(VALUE.1))) Select (final - * selects) - * - * @param dest - * @param qb - * @param input - * @return - * @throws SemanticException - * - * Generate a Group-By plan using 1 map-reduce job. - * Spray by the group by key, and sort by the distinct key (if any), and - * compute aggregates * - * The agggregation evaluation functions are as follows: - * Partitioning Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/merge - * (mode = COMPLETE) - **/ - @SuppressWarnings({ "unused", "nls" }) - private Operator genGroupByPlan1MR(String dest, QB qb, - Operator input) throws SemanticException { - - QBParseInfo parseInfo = qb.getParseInfo(); - - int numReducers = -1; - List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) - numReducers = 1; - - // ////// 1. Generate ReduceSinkOperator - Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( - qb, dest, input, grpByExprs.size(), numReducers, false); - - // ////// 2. Generate GroupbyOperator - Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, - dest, reduceSinkOperatorInfo, groupByDesc.Mode.COMPLETE, null); - - return groupByOperatorInfo; - } - - static ArrayList getUDAFEvaluators(ArrayList aggs) { - ArrayList result = new ArrayList(); - for (int i=0; i genericUDAFEvaluators = - new LinkedHashMap(); - - QBParseInfo parseInfo = qb.getParseInfo(); - - // ////// 2. Generate GroupbyOperator - Operator groupByOperatorInfo = - genGroupByPlanGroupByOperator1(parseInfo, dest, input, groupByDesc.Mode.HASH, genericUDAFEvaluators, true); - - int numReducers = -1; - List grpByExprs = getGroupByForClause(parseInfo, dest); - - // ////// 3. Generate ReduceSinkOperator2 - Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( - parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); - - // ////// 4. Generate GroupbyOperator2 - Operator groupByOperatorInfo2 = - genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, genericUDAFEvaluators); - - return groupByOperatorInfo2; - } - - /** - * Generate a Group-By plan using a 2 map-reduce jobs (5 operators will be - * inserted): - * - * ReduceSink ( keys = (K1_EXP, K2_EXP, DISTINCT_EXP), values = (A1_EXP, - * A2_EXP) ) NOTE: If DISTINCT_EXP is null, partition by rand() SortGroupBy - * (keys = (KEY.0,KEY.1), aggregations = (count_distinct(KEY.2), sum(VALUE.0), - * count(VALUE.1))) ReduceSink ( keys = (0,1), values=(2,3,4)) SortGroupBy - * (keys = (KEY.0,KEY.1), aggregations = (sum(VALUE.0), sum(VALUE.1), - * sum(VALUE.2))) Select (final selects) - * - * @param dest - * @param qb - * @param input - * @return - * @throws SemanticException - * - * Generate a Group-By plan using a 2 map-reduce jobs. - * Spray by the grouping key and distinct key (or a random number, if no distinct is - * present) in hope of getting a uniform distribution, and compute partial aggregates - * grouped by the reduction key (grouping key + distinct key). - * Evaluate partial aggregates first, and spray by the grouping key to compute actual - * aggregates in the second phase. - * The agggregation evaluation functions are as follows: - * Partitioning Key: - * random() if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/terminatePartial - * (mode = PARTIAL1) - * - * STAGE 2 - * - * Partitioning Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: merge/terminate - * (mode = FINAL) - */ - @SuppressWarnings("nls") - private Operator genGroupByPlan2MR(String dest, QB qb, - Operator input) throws SemanticException { - - QBParseInfo parseInfo = qb.getParseInfo(); - - // ////// 1. Generate ReduceSinkOperator - // There is a special case when we want the rows to be randomly distributed to - // reducers for load balancing problem. That happens when there is no DISTINCT - // operator. We set the numPartitionColumns to -1 for this purpose. This is - // captured by WritableComparableHiveObject.hashCode() function. - Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( - qb, dest, input, (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 - : Integer.MAX_VALUE), -1, false); - - // ////// 2. Generate GroupbyOperator - Map genericUDAFEvaluators = - new LinkedHashMap(); - GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanGroupByOperator(parseInfo, - dest, reduceSinkOperatorInfo, groupByDesc.Mode.PARTIAL1, genericUDAFEvaluators); - - int numReducers = -1; - List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) - numReducers = 1; - - // ////// 3. Generate ReduceSinkOperator2 - Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( - parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers); - - // ////// 4. Generate GroupbyOperator2 - Operator groupByOperatorInfo2 = - genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, - groupByDesc.Mode.FINAL, genericUDAFEvaluators); - - return groupByOperatorInfo2; - } - - private boolean optimizeMapAggrGroupBy(String dest, QB qb) { - List grpByExprs = getGroupByForClause(qb.getParseInfo(), dest); - if ((grpByExprs != null) && !grpByExprs.isEmpty()) - return false; - - if (qb.getParseInfo().getDistinctFuncExprForClause(dest) != null) - return false; - - return true; - } - - /** - * Generate a Group-By plan using 1 map-reduce job. - * First perform a map-side partial aggregation (to reduce the amount of data), at this - * point of time, we may turn off map-side partial aggregation based on its performance. - * Then spray by the group by key, and sort by the distinct key (if any), and - * compute aggregates based on actual aggregates - * - * The agggregation evaluation functions are as follows: - * Mapper: iterate/terminatePartial - * (mode = HASH) - * - * Partitioning Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/terminate if DISTINCT - * merge/terminate if NO DISTINCT - * (mode = MERGEPARTIAL) - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanMapAggr1MR(String dest, QB qb, - Operator inputOperatorInfo) throws SemanticException { - - QBParseInfo parseInfo = qb.getParseInfo(); - - // ////// Generate GroupbyOperator for a map-side partial aggregation - Map genericUDAFEvaluators = - new LinkedHashMap(); - GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanMapGroupByOperator(qb, - dest, inputOperatorInfo, groupByDesc.Mode.HASH, genericUDAFEvaluators); - - int numReducers = -1; - - // Optimize the scenario when there are no grouping keys - only 1 reducer is needed - List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) - numReducers = 1; - - // ////// Generate ReduceSink Operator - Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, - grpByExprs.size(), numReducers, true); - - // This is a 1-stage map-reduce processing of the groupby. Tha map-side aggregates was just used to - // reduce output data. In case of distincts, partial results are not used, and so iterate is again - // invoked on the reducer. In case of non-distincts, partial results are used, and merge is invoked - // on the reducer. - return genGroupByPlanGroupByOperator1(parseInfo, dest, - reduceSinkOperatorInfo, groupByDesc.Mode.MERGEPARTIAL, - genericUDAFEvaluators, false); - } - - /** - * Generate a Group-By plan using a 2 map-reduce jobs. - * However, only 1 group-by plan is generated if the query involves no grouping key and - * no distincts. In that case, the plan is same as generated by genGroupByPlanMapAggr1MR. - * Otherwise, the following plan is generated: - * First perform a map side partial aggregation (to reduce the amount of data). Then - * spray by the grouping key and distinct key (or a random number, if no distinct is - * present) in hope of getting a uniform distribution, and compute partial aggregates - * grouped by the reduction key (grouping key + distinct key). - * Evaluate partial aggregates first, and spray by the grouping key to compute actual - * aggregates in the second phase. - * The agggregation evaluation functions are as follows: - * Mapper: iterate/terminatePartial - * (mode = HASH) - * - * Partitioning Key: - * random() if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: iterate/terminatePartial if DISTINCT - * merge/terminatePartial if NO DISTINCT - * (mode = MERGEPARTIAL) - * - * STAGE 2 - * - * Partitioining Key: - * grouping key - * - * Sorting Key: - * grouping key if no DISTINCT - * grouping + distinct key if DISTINCT - * - * Reducer: merge/terminate - * (mode = FINAL) - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, - Operator inputOperatorInfo) throws SemanticException { - - QBParseInfo parseInfo = qb.getParseInfo(); - - // ////// Generate GroupbyOperator for a map-side partial aggregation - Map genericUDAFEvaluators = - new LinkedHashMap(); - GroupByOperator groupByOperatorInfo = (GroupByOperator)genGroupByPlanMapGroupByOperator(qb, - dest, inputOperatorInfo, groupByDesc.Mode.HASH, genericUDAFEvaluators); - - // Optimize the scenario when there are no grouping keys and no distinct - 2 map-reduce jobs are not needed - // For eg: select count(1) from T where t.ds = .... - if (!optimizeMapAggrGroupBy(dest, qb)) { - - // ////// Generate ReduceSink Operator - Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, - (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 - : Integer.MAX_VALUE), -1, true); - - // ////// Generate GroupbyOperator for a partial aggregation - Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, - dest, reduceSinkOperatorInfo, groupByDesc.Mode.PARTIALS, - genericUDAFEvaluators, false); - - int numReducers = -1; - List grpByExprs = getGroupByForClause(parseInfo, dest); - if (grpByExprs.isEmpty()) - numReducers = 1; - - // ////// Generate ReduceSinkOperator2 - Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(parseInfo, dest, groupByOperatorInfo2, - grpByExprs.size(), numReducers); - - // ////// Generate GroupbyOperator3 - return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, groupByDesc.Mode.FINAL, genericUDAFEvaluators); - } - else { - // ////// Generate ReduceSink Operator - Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest).size(), 1, true); - - return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.FINAL, genericUDAFEvaluators); - } - } - - @SuppressWarnings("nls") - private Operator genConversionOps(String dest, QB qb, - Operator input) throws SemanticException { - - Integer dest_type = qb.getMetaData().getDestTypeForAlias(dest); - Table dest_tab = null; - switch (dest_type.intValue()) { - case QBMetaData.DEST_TABLE: - { - dest_tab = qb.getMetaData().getDestTableForAlias(dest); - break; - } - case QBMetaData.DEST_PARTITION: - { - dest_tab = qb.getMetaData().getDestPartitionForAlias(dest).getTable(); - break; - } - default: - { - return input; - } - } - - return input; - } - - @SuppressWarnings("nls") - private Operator genFileSinkPlan(String dest, QB qb, - Operator input) throws SemanticException { - - RowResolver inputRR = opParseCtx.get(input).getRR(); - QBMetaData qbm = qb.getMetaData(); - Integer dest_type = qbm.getDestTypeForAlias(dest); - - Table dest_tab; // destination table if any - String queryTmpdir; // the intermediate destination directory - Path dest_path; // the final destination directory - tableDesc table_desc = null; - int currentTableId = 0; - boolean isLocal = false; - - switch (dest_type.intValue()) { - case QBMetaData.DEST_TABLE: { - - dest_tab = qbm.getDestTableForAlias(dest); - //check for partition - List parts = dest_tab.getTTable().getPartitionKeys(); - if(parts != null && parts.size() > 0) { - throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); - } - dest_path = dest_tab.getPath(); - queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); - table_desc = Utilities.getTableDesc(dest_tab); - - this.idToTableNameMap.put( String.valueOf(this.destTableId), dest_tab.getName()); - currentTableId = this.destTableId; - this.destTableId ++; - - // Create the work for moving the table - this.loadTableWork.add - (new loadTableDesc(queryTmpdir, - ctx.getExternalTmpFileURI(dest_path.toUri()), - table_desc, - new HashMap())); - outputs.add(new WriteEntity(dest_tab)); - break; - } - case QBMetaData.DEST_PARTITION: { - - Partition dest_part = qbm.getDestPartitionForAlias(dest); - dest_tab = dest_part.getTable(); - dest_path = dest_part.getPath()[0]; - queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); - table_desc = Utilities.getTableDesc(dest_tab); - - this.idToTableNameMap.put(String.valueOf(this.destTableId), dest_tab.getName()); - currentTableId = this.destTableId; - this.destTableId ++; - - this.loadTableWork.add - (new loadTableDesc(queryTmpdir, - ctx.getExternalTmpFileURI(dest_path.toUri()), - table_desc, dest_part.getSpec())); - outputs.add(new WriteEntity(dest_part)); - break; - } - case QBMetaData.DEST_LOCAL_FILE: - isLocal = true; - // fall through - case QBMetaData.DEST_DFS_FILE: { - dest_path = new Path(qbm.getDestFileForAlias(dest)); - String destStr = dest_path.toString(); - - if (isLocal) { - // for local directory - we always write to map-red intermediate - // store and then copy to local fs - queryTmpdir = ctx.getMRTmpFileURI(); - } else { - // otherwise write to the file system implied by the directory - // no copy is required. we may want to revisit this policy in future - - try { - Path qPath = FileUtils.makeQualified(dest_path, conf); - queryTmpdir = ctx.getExternalTmpFileURI(qPath.toUri()); - } catch (Exception e) { - throw new SemanticException("Error creating temporary folder on: " - + dest_path, e); - } - } - String cols = new String(); - String colTypes = new String(); - Vector colInfos = inputRR.getColumnInfos(); - - // CTAS case: the file output format and serde are defined by the create table command - // rather than taking the default value - List field_schemas = null; - createTableDesc tblDesc = qb.getTableDesc(); - if ( tblDesc != null ) - field_schemas = new ArrayList(); - - boolean first = true; - for (ColumnInfo colInfo:colInfos) { - String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); - - if ( nm[1] != null ) { // non-null column alias - colInfo.setAlias(nm[1]); - } - - if ( field_schemas != null ) { - FieldSchema col = new FieldSchema(); - if ( nm[1] != null ) { - col.setName(colInfo.getAlias()); - } else { - col.setName(colInfo.getInternalName()); - } - col.setType(colInfo.getType().getTypeName()); - field_schemas.add(col); - } - - if (!first) { - cols = cols.concat(","); - colTypes = colTypes.concat(":"); - } - - first = false; - cols = cols.concat(colInfo.getInternalName()); - - // Replace VOID type with string when the output is a temp table or local files. - // A VOID type can be generated under the query: - // - // select NULL from tt; - // or - // insert overwrite local directory "abc" select NULL from tt; - // - // where there is no column type to which the NULL value should be converted. - // - String tName = colInfo.getType().getTypeName(); - if ( tName.equals(Constants.VOID_TYPE_NAME) ) - colTypes = colTypes.concat(Constants.STRING_TYPE_NAME); - else - colTypes = colTypes.concat(tName); - } - - // update the create table descriptor with the resulting schema. - if ( tblDesc != null ) - tblDesc.setCols(field_schemas); - - if (!ctx.isMRTmpFileURI(destStr)) { - this.idToTableNameMap.put( String.valueOf(this.destTableId), destStr); - currentTableId = this.destTableId; - this.destTableId ++; - } - - boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE); - this.loadFileWork.add(new loadFileDesc(queryTmpdir, destStr, - isDfsDir, cols, colTypes)); - - if ( tblDesc == null ) { - table_desc = PlanUtils.getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), - cols, colTypes, false); - } else { - table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes); - } - - outputs.add(new WriteEntity(destStr, !isDfsDir)); - break; - } - default: - throw new SemanticException("Unknown destination type: " + dest_type); - } - - input = genConversionSelectOperator(dest, qb, input, table_desc); - inputRR = opParseCtx.get(input).getRR(); - - Vector vecCol = new Vector(); - - try { - StructObjectInspector rowObjectInspector = (StructObjectInspector)table_desc.getDeserializer().getObjectInspector(); - List fields = rowObjectInspector.getAllStructFieldRefs(); - for (int i=0; i tableFields = oi.getAllStructFieldRefs(); - Vector rowFields = opParseCtx.get(input).getRR().getColumnInfos(); - if (tableFields.size() != rowFields.size()) { - String reason = "Table " + dest + " has " + tableFields.size() + " columns but query has " - + rowFields.size() + " columns."; - throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg( - qb.getParseInfo().getDestForClause(dest), reason)); - } - - // Check column types - boolean converted = false; - int columnNumber = tableFields.size(); - ArrayList expressions = new ArrayList(columnNumber); - // MetadataTypedColumnsetSerDe does not need type conversions because it does - // the conversion to String by itself. - boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class); - boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class); - if (!isMetaDataSerDe) { - for (int i=0; i colName = new ArrayList(); - for (int i=0; i partitionCols = new ArrayList(); - if (partitionExprs != null) { - int ccount = partitionExprs.getChildCount(); - for(int i=0; i sortCols = new ArrayList(); - StringBuilder order = new StringBuilder(); - if (sortExprs != null) { - int ccount = sortExprs.getChildCount(); - for(int i=0; i colExprMap = new HashMap(); - ArrayList valueCols = new ArrayList(); - for(ColumnInfo colInfo: inputRR.getColumnInfos()) { - valueCols.add(new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol())); - colExprMap.put(colInfo.getInternalName(), valueCols.get(valueCols.size() - 1)); - } - - ArrayList outputColumns = new ArrayList(); - for (int i = 0; i < valueCols.size(); i++) - outputColumns.add(getColumnInternalName(i)); - Operator interim = putOpInsertMap( - OperatorFactory.getAndMakeChild( - PlanUtils.getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1, partitionCols, order.toString(), - numReducers), - new RowSchema(inputRR.getColumnInfos()), - input), inputRR); - interim.setColumnExprMap(colExprMap); - - // Add the extract operator to get the value fields - RowResolver out_rwsch = new RowResolver(); - RowResolver interim_rwsch = inputRR; - Integer pos = Integer.valueOf(0); - for(ColumnInfo colInfo: interim_rwsch.getColumnInfos()) { - String [] info = interim_rwsch.reverseLookup(colInfo.getInternalName()); - out_rwsch.put(info[0], info[1], - new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), info[0], false)); - pos = Integer.valueOf(pos.intValue() + 1); - } - - Operator output = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new extractDesc(new exprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, - Utilities.ReduceField.VALUE.toString(), - "", false)), - new RowSchema(out_rwsch.getColumnInfos()), - interim), out_rwsch); - - LOG.debug("Created ReduceSink Plan for clause: " + dest + " row schema: " - + out_rwsch.toString()); - return output; - } - - private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, - HashSet omitOpts) - throws SemanticException { - - RowResolver outputRS = new RowResolver(); - ArrayList outputColumnNames = new ArrayList(); - // all children are base classes - Operator[] rightOps = new Operator[right.length]; - int outputPos = 0; - - Map reversedExprs = new HashMap(); - HashMap> exprMap = new HashMap>(); - Map colExprMap = new HashMap(); - HashMap> posToAliasMap = new HashMap>(); - - for ( int pos = 0; pos < right.length; ++pos ) { - - Operator input = right[pos]; - if (input == null) - input = left; - - ArrayList keyDesc = new ArrayList(); - Byte tag = Byte.valueOf((byte)(((reduceSinkDesc)(input.getConf())).getTag())); - - // check whether this input operator produces output - if ( omitOpts == null || !omitOpts.contains(pos) ) { - // prepare output descriptors for the input opt - RowResolver inputRS = opParseCtx.get(input).getRR(); - Iterator keysIter = inputRS.getTableNames().iterator(); - Set aliases = posToAliasMap.get(pos); - if(aliases == null) { - aliases = new HashSet(); - posToAliasMap.put(pos, aliases); - } - while (keysIter.hasNext()) { - String key = keysIter.next(); - aliases.add(key); - HashMap map = inputRS.getFieldMap(key); - Iterator fNamesIter = map.keySet().iterator(); - while (fNamesIter.hasNext()) { - String field = fNamesIter.next(); - ColumnInfo valueInfo = inputRS.get(key, field); - keyDesc.add(new exprNodeColumnDesc(valueInfo.getType(), - valueInfo.getInternalName(), - valueInfo.getTabAlias(), - valueInfo.getIsPartitionCol())); - - if (outputRS.get(key, field) == null) { - String colName = getColumnInternalName(outputPos); - outputPos++; - outputColumnNames.add(colName); - colExprMap.put(colName, keyDesc.get(keyDesc.size() - 1)); - outputRS.put(key, field, new ColumnInfo(colName, - valueInfo.getType(), key, false)); - reversedExprs.put(colName, tag); - } - } - } - } - exprMap.put(tag, keyDesc); - rightOps[pos] = input; - } - - org.apache.hadoop.hive.ql.plan.joinCond[] joinCondns = new org.apache.hadoop.hive.ql.plan.joinCond[join.getJoinCond().length]; - for (int i = 0; i < join.getJoinCond().length; i++) { - joinCond condn = join.getJoinCond()[i]; - joinCondns[i] = new org.apache.hadoop.hive.ql.plan.joinCond(condn); - } - - joinDesc desc = new joinDesc(exprMap, outputColumnNames, joinCondns); - desc.setReversedExprs(reversedExprs); - JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc, - new RowSchema(outputRS.getColumnInfos()), rightOps); - joinOp.setColumnExprMap(colExprMap); - joinOp.setPosToAliasMap(posToAliasMap); - return putOpInsertMap(joinOp, outputRS); - } - - @SuppressWarnings("nls") - private Operator genJoinReduceSinkChild(QB qb, QBJoinTree joinTree, - Operator child, String srcName, int pos) throws SemanticException { - RowResolver inputRS = opParseCtx.get(child).getRR(); - RowResolver outputRS = new RowResolver(); - ArrayList outputColumns = new ArrayList(); - ArrayList reduceKeys = new ArrayList(); - - // Compute join keys and store in reduceKeys - Vector exprs = joinTree.getExpressions().get(pos); - for (int i = 0; i < exprs.size(); i++) { - ASTNode expr = exprs.get(i); - reduceKeys.add(genExprNodeDesc(expr, inputRS)); - } - - // Walk over the input row resolver and copy in the output - ArrayList reduceValues = new ArrayList(); - Iterator tblNamesIter = inputRS.getTableNames().iterator(); - Map colExprMap = new HashMap(); - while (tblNamesIter.hasNext()) - { - String src = tblNamesIter.next(); - HashMap fMap = inputRS.getFieldMap(src); - for (Map.Entry entry : fMap.entrySet()) { - String field = entry.getKey(); - ColumnInfo valueInfo = entry.getValue(); - exprNodeColumnDesc inputExpr = new exprNodeColumnDesc(valueInfo.getType(), - valueInfo.getInternalName(), - valueInfo.getTabAlias(), - valueInfo.getIsPartitionCol()); - reduceValues.add(inputExpr); - if (outputRS.get(src, field) == null) { - String col = getColumnInternalName(reduceValues.size() - 1); - outputColumns.add(col); - ColumnInfo newColInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + - col, - valueInfo.getType(), src, false); - colExprMap.put(newColInfo.getInternalName(), inputExpr); - outputRS.put(src, field, newColInfo); - } - } - } - - int numReds = -1; - - // Use only 1 reducer in case of cartesian product - if (reduceKeys.size() == 0) { - numReds = 1; - - // Cartesian product is not supported in strict mode - if (conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")) - throw new SemanticException(ErrorMsg.NO_CARTESIAN_PRODUCT.getMsg()); - } - - ReduceSinkOperator rsOp = (ReduceSinkOperator)putOpInsertMap( - OperatorFactory.getAndMakeChild( - PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, joinTree.getNextTag(), reduceKeys.size(), numReds), - new RowSchema(outputRS.getColumnInfos()), - child), outputRS); - rsOp.setColumnExprMap(colExprMap); - return rsOp; - } - - private Operator genJoinOperator(QB qb, QBJoinTree joinTree, - HashMap map) throws SemanticException { - QBJoinTree leftChild = joinTree.getJoinSrc(); - Operator joinSrcOp = null; - if (leftChild != null) - { - Operator joinOp = genJoinOperator(qb, leftChild, map); - Vector filter = joinTree.getFilters().get(0); - for (ASTNode cond: filter) - joinOp = genFilterPlan(qb, cond, joinOp); - - joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinOp, null, 0); - } - - Operator[] srcOps = new Operator[joinTree.getBaseSrc().length]; - - HashSet omitOpts = null; // set of input to the join that should be omitted by the output - int pos = 0; - for (String src : joinTree.getBaseSrc()) { - if (src != null) { - Operator srcOp = map.get(src); - - // for left-semi join, generate an additional selection & group-by operator before ReduceSink - ArrayList fields = joinTree.getRHSSemijoinColumns(src); - if ( fields != null ) { - // the RHS table columns should be not be output from the join - if ( omitOpts == null ) { - omitOpts = new HashSet(); - } - omitOpts.add(pos); - - // generate a selection operator for group-by keys only - srcOp = insertSelectForSemijoin(fields, srcOp); - - // generate a groupby operator (HASH mode) for a map-side partial aggregation for semijoin - srcOp = genMapGroupByForSemijoin(qb, fields, srcOp, groupByDesc.Mode.HASH); - } - - // generate a ReduceSink operator for the join - srcOps[pos] = genJoinReduceSinkChild(qb, joinTree, srcOp, src, pos); - pos++; - } else { - assert pos == 0; - srcOps[pos++] = null; - } - } - - // Type checking and implicit type conversion for join keys - genJoinOperatorTypeCheck(joinSrcOp, srcOps); - - JoinOperator joinOp = (JoinOperator)genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts); - joinContext.put(joinOp, joinTree); - return joinOp; - } - - /** - * Construct a selection operator for semijoin that filter out all fields other than the group by keys. - * - * @param fields list of fields need to be output - * @param input input operator - * @return the selection operator. - * @throws SemanticException - */ - private Operator insertSelectForSemijoin(ArrayList fields, Operator input) - throws SemanticException { - - RowResolver inputRR = opParseCtx.get(input).getRR(); - ArrayList colList = new ArrayList(); - ArrayList columnNames = new ArrayList(); - - // construct the list of columns that need to be projected - for (ASTNode field: fields) { - exprNodeColumnDesc exprNode = (exprNodeColumnDesc) genExprNodeDesc(field, inputRR); - colList.add(exprNode); - columnNames.add(exprNode.getColumn()); - } - - // create selection operator - Operator output = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new selectDesc(colList, columnNames, false), - new RowSchema(inputRR.getColumnInfos()), - input), - inputRR); - - output.setColumnExprMap(input.getColumnExprMap()); - return output; - } - - private Operator genMapGroupByForSemijoin(QB qb, - ArrayList fields, // the ASTNode of the join key "tab.col" - Operator inputOperatorInfo, - groupByDesc.Mode mode) - throws SemanticException { - - RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); - RowResolver groupByOutputRowResolver = new RowResolver(); - ArrayList groupByKeys = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); - ArrayList aggregations = new ArrayList(); - Map colExprMap = new HashMap(); - QBParseInfo parseInfo = qb.getParseInfo(); - - groupByOutputRowResolver.setIsExprResolver(true); // join keys should only be columns but not be expressions - - for (int i = 0; i < fields.size(); ++i) { - // get the group by keys to ColumnInfo - ASTNode colName = fields.get(i); - exprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver); - groupByKeys.add(grpByExprNode); - - // generate output column names - String field = getColumnInternalName(i); - outputColumnNames.add(field); - ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false); - groupByOutputRowResolver.put("", colName.toStringTree(), colInfo2); - - // establish mapping from the output column to the input column - colExprMap.put(field, grpByExprNode); - } - - // Generate group-by operator - Operator op = putOpInsertMap( - OperatorFactory.getAndMakeChild( - new groupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - groupByOutputRowResolver); - - op.setColumnExprMap(colExprMap); - return op; - } - - private Operator genReduceSinkForSemijoin(QB qb, - ArrayList fields, // semijoin key for the rhs table - Operator inputOperatorInfo) - throws SemanticException { - - RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); - QBParseInfo parseInfo = qb.getParseInfo(); - RowResolver reduceSinkOutputRowResolver = new RowResolver(); - Map colExprMap = new HashMap(); - ArrayList reduceKeys = new ArrayList(); - List outputColumnNames = new ArrayList(); - - reduceSinkOutputRowResolver.setIsExprResolver(true); - - // Pre-compute group-by keys and store in reduceKeys - for (int i = 0; i < fields.size(); ++i) { - // based on the input row resolver, resolve the column names and construct expression node descriptors - ASTNode colName = fields.get(i); - exprNodeDesc inputExpr = genExprNodeDesc(colName, reduceSinkInputRowResolver); - - reduceKeys.add(inputExpr); - - // create new ColumnInfos for the groupby columns and put them into the output row resolver - if (reduceSinkOutputRowResolver.get("", colName.toStringTree()) == null) { - outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo1 = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), - null, false); - reduceSinkOutputRowResolver.put("", colName.toStringTree(), colInfo1); - colExprMap.put(colInfo1.getInternalName(), inputExpr); - } else { - throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY.getMsg()); - } - } - - // SEMIJOIN HAS NO AGGREGATIONS, and we don't really use reduce values, so leave it as an empty list - ArrayList reduceValues = new ArrayList(); - int numPartitionFields = fields.size(); - - // finally generate the ReduceSink operator - ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, -1), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - reduceSinkOutputRowResolver); - rsOp.setColumnExprMap(colExprMap); - - return rsOp; - } - - private void genJoinOperatorTypeCheck(Operator left, Operator[] right) throws SemanticException { - // keys[i] -> ArrayList for the i-th join operator key list - ArrayList> keys = new ArrayList>(); - int keyLength = 0; - for (int i=0; i map) - throws SemanticException { - QBJoinTree joinTree = qb.getQbJoinTree(); - Operator joinOp = genJoinOperator(qb, joinTree, map); - return joinOp; - } - - /** - * Extract the filters from the join condition and push them on top of the source operators. This procedure - * traverses the query tree recursively, - */ - private void pushJoinFilters(QB qb, QBJoinTree joinTree, HashMap map) throws SemanticException { - Vector> filters = joinTree.getFilters(); - if (joinTree.getJoinSrc() != null) - pushJoinFilters(qb, joinTree.getJoinSrc(), map); - - int pos = 0; - for (String src : joinTree.getBaseSrc()) { - if (src != null) { - Operator srcOp = map.get(src); - Vector filter = filters.get(pos); - for (ASTNode cond: filter) - srcOp = genFilterPlan(qb, cond, srcOp); - map.put(src, srcOp); - } - pos++; - } - } - - private List getMapSideJoinTables(QB qb) { - List cols = new ArrayList(); - ASTNode hints = qb.getParseInfo().getHints(); - for (int pos = 0; pos < hints.getChildCount(); pos++) { - ASTNode hint = (ASTNode)hints.getChild(pos); - if (((ASTNode)hint.getChild(0)).getToken().getType() == HiveParser.TOK_MAPJOIN) { - ASTNode hintTblNames = (ASTNode)hint.getChild(1); - int numCh = hintTblNames.getChildCount(); - for (int tblPos = 0; tblPos < numCh; tblPos++) { - String tblName = ((ASTNode)hintTblNames.getChild(tblPos)).getText().toLowerCase(); - if (!cols.contains(tblName)) - cols.add(tblName); - } - } - } - - return cols; - } - - private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree) - throws SemanticException { - QBJoinTree joinTree = new QBJoinTree(); - joinTree.setNoOuterJoin(false); - - joinTree.setExpressions(new Vector>()); - joinTree.setFilters(new Vector>()); - - // Create joinTree structures to fill them up later - Vector rightAliases = new Vector(); - Vector leftAliases = new Vector(); - Vector baseSrc = new Vector(); - Vector preserved = new Vector(); - - boolean lastPreserved = false; - int cols = -1; - - for(int i = 0; i < joinParseTree.getChildCount(); i++) { - ASTNode child = (ASTNode) joinParseTree.getChild(i); - - switch(child.getToken().getType()) { - case HiveParser.TOK_TABREF: - // Handle a table - populate aliases appropriately: - // leftAliases should contain the first table, rightAliases should - // contain all other tables and baseSrc should contain all tables - - String table_name = unescapeIdentifier(child.getChild(0).getText()); - String alias = child.getChildCount() == 1 ? table_name : - unescapeIdentifier(child.getChild(child.getChildCount()-1).getText().toLowerCase()); - - if (i == 0) { - leftAliases.add(alias); - joinTree.setLeftAlias(alias); - } else { - rightAliases.add(alias); - } - baseSrc.add(alias); - - preserved.add(lastPreserved); - lastPreserved = false; - break; - - case HiveParser.TOK_EXPLIST: - if (cols == -1 && child.getChildCount() != 0) { - cols = child.getChildCount(); - } else if(child.getChildCount() != cols) { - throw new SemanticException("Tables with different or invalid " + - "number of keys in UNIQUEJOIN"); - } - - Vector expressions = new Vector(); - Vector filt = new Vector(); - - for (Node exp: child.getChildren()) { - expressions.add((ASTNode)exp); - } - - joinTree.getExpressions().add(expressions); - joinTree.getFilters().add(filt); - break; - - case HiveParser.KW_PRESERVE: - lastPreserved = true; - break; - - case HiveParser.TOK_SUBQUERY: - throw new SemanticException("Subqueries are not supported in UNIQUEJOIN"); - - default: - throw new SemanticException("Unexpected UNIQUEJOIN structure"); - } - } - - joinTree.setBaseSrc(baseSrc.toArray(new String[0])); - joinTree.setLeftAliases(leftAliases.toArray(new String[0])); - joinTree.setRightAliases(rightAliases.toArray(new String[0])); - - joinCond[] condn = new joinCond[preserved.size()]; - for (int i = 0; i < condn.length; i++) { - condn[i] = new joinCond(preserved.get(i)); - } - joinTree.setJoinCond(condn); - - if (qb.getParseInfo().getHints() != null) { - parseStreamTables(joinTree, qb); - } - - return joinTree; - } - - private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree) - throws SemanticException { - QBJoinTree joinTree = new QBJoinTree(); - joinCond[] condn = new joinCond[1]; - - switch (joinParseTree.getToken().getType() ) { - case HiveParser.TOK_LEFTOUTERJOIN: - joinTree.setNoOuterJoin(false); - condn[0] = new joinCond(0, 1, joinType.LEFTOUTER); - break; - case HiveParser.TOK_RIGHTOUTERJOIN: - joinTree.setNoOuterJoin(false); - condn[0] = new joinCond(0, 1, joinType.RIGHTOUTER); - break; - case HiveParser.TOK_FULLOUTERJOIN: - joinTree.setNoOuterJoin(false); - condn[0] = new joinCond(0, 1, joinType.FULLOUTER); - break; - case HiveParser.TOK_LEFTSEMIJOIN: - joinTree.setNoSemiJoin(false); - condn[0] = new joinCond(0, 1, joinType.LEFTSEMI); - break; - default: - condn[0] = new joinCond(0, 1, joinType.INNER); - joinTree.setNoOuterJoin(true); - break; - } - - joinTree.setJoinCond(condn); - - ASTNode left = (ASTNode) joinParseTree.getChild(0); - ASTNode right = (ASTNode) joinParseTree.getChild(1); - - if ((left.getToken().getType() == HiveParser.TOK_TABREF) - || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)) { - String table_name = unescapeIdentifier(left.getChild(0).getText()); - String alias = left.getChildCount() == 1 ? table_name : - unescapeIdentifier(left.getChild(left.getChildCount()-1).getText().toLowerCase()); - joinTree.setLeftAlias(alias); - String[] leftAliases = new String[1]; - leftAliases[0] = alias; - joinTree.setLeftAliases(leftAliases); - String[] children = new String[2]; - children[0] = alias; - joinTree.setBaseSrc(children); - } - else if (isJoinToken(left)) { - QBJoinTree leftTree = genJoinTree(qb, left); - joinTree.setJoinSrc(leftTree); - String[] leftChildAliases = leftTree.getLeftAliases(); - String leftAliases[] = new String[leftChildAliases.length + 1]; - for (int i = 0; i < leftChildAliases.length; i++) - leftAliases[i] = leftChildAliases[i]; - leftAliases[leftChildAliases.length] = leftTree.getRightAliases()[0]; - joinTree.setLeftAliases(leftAliases); - } else - assert (false); - - if ((right.getToken().getType() == HiveParser.TOK_TABREF) - || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)) { - String table_name = unescapeIdentifier(right.getChild(0).getText()); - String alias = right.getChildCount() == 1 ? table_name : - unescapeIdentifier(right.getChild(right.getChildCount()-1).getText().toLowerCase()); - String[] rightAliases = new String[1]; - rightAliases[0] = alias; - joinTree.setRightAliases(rightAliases); - String[] children = joinTree.getBaseSrc(); - if (children == null) - children = new String[2]; - children[1] = alias; - joinTree.setBaseSrc(children); - // remember rhs table for semijoin - if (joinTree.getNoSemiJoin() == false) { - joinTree.addRHSSemijoin(alias); - } - } else - assert false; - - Vector> expressions = new Vector>(); - expressions.add(new Vector()); - expressions.add(new Vector()); - joinTree.setExpressions(expressions); - - Vector> filters = new Vector>(); - filters.add(new Vector()); - filters.add(new Vector()); - joinTree.setFilters(filters); - - ASTNode joinCond = (ASTNode) joinParseTree.getChild(2); - Vector leftSrc = new Vector(); - parseJoinCondition(joinTree, joinCond, leftSrc); - if (leftSrc.size() == 1) - joinTree.setLeftAlias(leftSrc.get(0)); - - // check the hints to see if the user has specified a map-side join. This will be removed later on, once the cost-based - // infrastructure is in place - if (qb.getParseInfo().getHints() != null) { - List mapSideTables = getMapSideJoinTables(qb); - List mapAliases = joinTree.getMapAliases(); - - for (String mapTbl : mapSideTables) { - boolean mapTable = false; - for (String leftAlias : joinTree.getLeftAliases()) { - if (mapTbl.equalsIgnoreCase(leftAlias)) - mapTable = true; - } - for (String rightAlias : joinTree.getRightAliases()) { - if (mapTbl.equalsIgnoreCase(rightAlias)) - mapTable = true; - } - - if (mapTable) { - if (mapAliases == null) { - mapAliases = new ArrayList(); - } - mapAliases.add(mapTbl); - joinTree.setMapSideJoin(true); - } - } - - joinTree.setMapAliases(mapAliases); - - parseStreamTables(joinTree, qb); - } - - return joinTree; - } - - private void parseStreamTables(QBJoinTree joinTree, QB qb) { - List streamAliases = joinTree.getStreamAliases(); - - for (Node hintNode: qb.getParseInfo().getHints().getChildren()) { - ASTNode hint = (ASTNode)hintNode; - if (hint.getChild(0).getType() == HiveParser.TOK_STREAMTABLE) { - for (int i = 0; i < hint.getChild(1).getChildCount(); i++) { - if (streamAliases == null) { - streamAliases = new ArrayList(); - } - streamAliases.add(hint.getChild(1).getChild(i).getText()); - } - } - } - - joinTree.setStreamAliases(streamAliases); - } - - private void mergeJoins(QB qb, QBJoinTree parent, QBJoinTree node, - QBJoinTree target, int pos) { - String[] nodeRightAliases = node.getRightAliases(); - String[] trgtRightAliases = target.getRightAliases(); - String[] rightAliases = new String[nodeRightAliases.length - + trgtRightAliases.length]; - - for (int i = 0; i < trgtRightAliases.length; i++) - rightAliases[i] = trgtRightAliases[i]; - for (int i = 0; i < nodeRightAliases.length; i++) - rightAliases[i + trgtRightAliases.length] = nodeRightAliases[i]; - target.setRightAliases(rightAliases); - - String[] nodeBaseSrc = node.getBaseSrc(); - String[] trgtBaseSrc = target.getBaseSrc(); - String[] baseSrc = new String[nodeBaseSrc.length + trgtBaseSrc.length - 1]; - - for (int i = 0; i < trgtBaseSrc.length; i++) - baseSrc[i] = trgtBaseSrc[i]; - for (int i = 1; i < nodeBaseSrc.length; i++) - baseSrc[i + trgtBaseSrc.length - 1] = nodeBaseSrc[i]; - target.setBaseSrc(baseSrc); - - Vector> expr = target.getExpressions(); - for (int i = 0; i < nodeRightAliases.length; i++) - expr.add(node.getExpressions().get(i + 1)); - - Vector> filter = target.getFilters(); - for (int i = 0; i < nodeRightAliases.length; i++) - filter.add(node.getFilters().get(i + 1)); - - if (node.getFilters().get(0).size() != 0) { - Vector filterPos = filter.get(pos); - filterPos.addAll(node.getFilters().get(0)); - } - - if (qb.getQbJoinTree() == node) - qb.setQbJoinTree(node.getJoinSrc()); - else - parent.setJoinSrc(node.getJoinSrc()); - - if (node.getNoOuterJoin() && target.getNoOuterJoin()) - target.setNoOuterJoin(true); - else - target.setNoOuterJoin(false); - - if (node.getNoSemiJoin() && target.getNoSemiJoin()) - target.setNoSemiJoin(true); - else - target.setNoSemiJoin(false); - - target.mergeRHSSemijoin(node); - - joinCond[] nodeCondns = node.getJoinCond(); - int nodeCondnsSize = nodeCondns.length; - joinCond[] targetCondns = target.getJoinCond(); - int targetCondnsSize = targetCondns.length; - joinCond[] newCondns = new joinCond[nodeCondnsSize + targetCondnsSize]; - for (int i = 0; i < targetCondnsSize; i++) - newCondns[i] = targetCondns[i]; - - for (int i = 0; i < nodeCondnsSize; i++) - { - joinCond nodeCondn = nodeCondns[i]; - if (nodeCondn.getLeft() == 0) - nodeCondn.setLeft(pos); - else - nodeCondn.setLeft(nodeCondn.getLeft() + targetCondnsSize); - nodeCondn.setRight(nodeCondn.getRight() + targetCondnsSize); - newCondns[targetCondnsSize + i] = nodeCondn; - } - - target.setJoinCond(newCondns); - if (target.isMapSideJoin()) { - assert node.isMapSideJoin(); - List mapAliases = target.getMapAliases(); - for (String mapTbl : node.getMapAliases()) - if (!mapAliases.contains(mapTbl)) - mapAliases.add(mapTbl); - target.setMapAliases(mapAliases); - } - } - - private int findMergePos(QBJoinTree node, QBJoinTree target) { - int res = -1; - String leftAlias = node.getLeftAlias(); - if (leftAlias == null) - return -1; - - Vector nodeCondn = node.getExpressions().get(0); - Vector targetCondn = null; - - if (leftAlias.equals(target.getLeftAlias())) - { - targetCondn = target.getExpressions().get(0); - res = 0; - } - else - for (int i = 0; i < target.getRightAliases().length; i++) { - if (leftAlias.equals(target.getRightAliases()[i])) { - targetCondn = target.getExpressions().get(i + 1); - res = i + 1; - break; - } - } - - if ((targetCondn == null) || (nodeCondn.size() != targetCondn.size())) - return -1; - - for (int i = 0; i < nodeCondn.size(); i++) - if (!nodeCondn.get(i).toStringTree().equals( - targetCondn.get(i).toStringTree())) - return -1; - - return res; - } - - private boolean mergeJoinNodes(QB qb, QBJoinTree parent, QBJoinTree node, - QBJoinTree target) { - if (target == null) - return false; - - int res = findMergePos(node, target); - if (res != -1) { - mergeJoins(qb, parent, node, target, res); - return true; - } - - return mergeJoinNodes(qb, parent, node, target.getJoinSrc()); - } - - private void mergeJoinTree(QB qb) { - QBJoinTree root = qb.getQbJoinTree(); - QBJoinTree parent = null; - while (root != null) { - boolean merged = mergeJoinNodes(qb, parent, root, root.getJoinSrc()); - - if (parent == null) { - if (merged) - root = qb.getQbJoinTree(); - else { - parent = root; - root = root.getJoinSrc(); - } - } else { - parent = parent.getJoinSrc(); - root = parent.getJoinSrc(); - } - } - } - - private Operator insertSelectAllPlanForGroupBy(String dest, Operator input) - throws SemanticException { - OpParseContext inputCtx = opParseCtx.get(input); - RowResolver inputRR = inputCtx.getRR(); - Vector columns = inputRR.getColumnInfos(); - ArrayList colList = new ArrayList(); - ArrayList columnNames = new ArrayList(); - for (int i = 0; i < columns.size(); i++) { - ColumnInfo col = columns.get(i); - colList.add(new exprNodeColumnDesc(col.getType(), col.getInternalName(), - col.getTabAlias(), col.getIsPartitionCol())); - columnNames.add(col.getInternalName()); - } - Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new selectDesc(colList, columnNames, true), new RowSchema(inputRR.getColumnInfos()), input), inputRR); - output.setColumnExprMap(input.getColumnExprMap()); - return output; - } - - // Return the common distinct expression - // There should be more than 1 destination, with group bys in all of them. - private List getCommonDistinctExprs(QB qb, Operator input) { - RowResolver inputRR = opParseCtx.get(input).getRR(); - QBParseInfo qbp = qb.getParseInfo(); - - TreeSet ks = new TreeSet(); - ks.addAll(qbp.getClauseNames()); - - // Go over all the destination tables - if (ks.size() <= 1) - return null; - - List oldList = null; - List oldASTList = null; - - for (String dest : ks) { - Operator curr = input; - - // If a filter is present, common processing is not possible - if (qbp.getWhrForClause(dest) != null) - return null; - - if (qbp.getAggregationExprsForClause(dest).size() == 0 - && getGroupByForClause(qbp, dest).size() == 0) - return null; - - // All distinct expressions must be the same - ASTNode value = qbp.getDistinctFuncExprForClause(dest); - if (value == null) - return null; - - List currDestList = new ArrayList(); - List currASTList = new ArrayList(); - try { - // 0 is function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode parameter = (ASTNode) value.getChild(i); - currDestList.add(genExprNodeDesc(parameter, inputRR)); - currASTList.add(parameter); - } - } catch (SemanticException e) { - return null; - } - - if (oldList == null) { - oldList = currDestList; - oldASTList = currASTList; - } - else { - if (oldList.size() != currDestList.size()) - return null; - for (int pos = 0; pos < oldList.size(); pos++) - { - if (!oldList.get(pos).isSame(currDestList.get(pos))) - return null; - } - } - } - - return oldASTList; - } - - private Operator createCommonReduceSink(QB qb, Operator input) throws SemanticException { - // Go over all the tables and extract the common distinct key - List distExprs = getCommonDistinctExprs(qb, input); - - QBParseInfo qbp = qb.getParseInfo(); - TreeSet ks = new TreeSet(); - ks.addAll(qbp.getClauseNames()); - - // Pass the entire row - RowResolver inputRR = opParseCtx.get(input).getRR(); - RowResolver reduceSinkOutputRowResolver = new RowResolver(); - reduceSinkOutputRowResolver.setIsExprResolver(true); - ArrayList reduceKeys = new ArrayList(); - ArrayList reduceValues = new ArrayList(); - Map colExprMap = new HashMap(); - - // Pre-compute distinct group-by keys and store in reduceKeys - - List outputColumnNames = new ArrayList(); - for (ASTNode distn : distExprs) { - exprNodeDesc distExpr = genExprNodeDesc(distn, inputRR); - reduceKeys.add(distExpr); - String text = distn.toStringTree(); - if (reduceSinkOutputRowResolver.get("", text) == null) { - outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, - reduceKeys.get(reduceKeys.size()-1).getTypeInfo(), "", false); - reduceSinkOutputRowResolver.put("", text, colInfo); - colExprMap.put(colInfo.getInternalName(), distExpr); - } - } - - // Go over all the grouping keys and aggregations - for (String dest : ks) { - - List grpByExprs = getGroupByForClause(qbp, dest); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - String text = grpbyExpr.toStringTree(); - - if (reduceSinkOutputRowResolver.get("", text) == null) { - exprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, inputRR); - reduceValues.add(grpByExprNode); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(reduceValues.size()-1).getTypeInfo(), "", false); - reduceSinkOutputRowResolver.put("", text, colInfo); - outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); - } - } - - // For each aggregation - HashMap aggregationTrees = qbp.getAggregationExprsForClause(dest); - assert (aggregationTrees != null); - - for (Map.Entry entry : aggregationTrees.entrySet()) { - ASTNode value = entry.getValue(); - String aggName = value.getChild(0).getText(); - - // 0 is the function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode)value.getChild(i); - String text = paraExpr.toStringTree(); - - if (reduceSinkOutputRowResolver.get("", text) == null) { - exprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); - reduceValues.add(paraExprNode); - String field = Utilities.ReduceField.VALUE.toString() + "." + getColumnInternalName(reduceValues.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(reduceValues.size()-1).getTypeInfo(), "", false); - reduceSinkOutputRowResolver.put("", text, colInfo); - outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); - } - } - } - } - - ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input), - reduceSinkOutputRowResolver); - - rsOp.setColumnExprMap(colExprMap); - return rsOp; - } - - @SuppressWarnings("nls") - private Operator genBodyPlan(QB qb, Operator input) - throws SemanticException { - - QBParseInfo qbp = qb.getParseInfo(); - - TreeSet ks = new TreeSet(); - ks.addAll(qbp.getClauseNames()); - - // For multi-group by with the same distinct, we ignore all user hints currently. It doesnt matter whether he has asked to do - // map-side aggregation or not. Map side aggregation is turned off - boolean optimizeMultiGroupBy = (getCommonDistinctExprs(qb, input) != null); - Operator curr = null; - - // If there are multiple group-bys, map-side aggregation is turned off, there are no filters - // and there is a single distinct, optimize that. Spray initially by the distinct key, - // no computation at the mapper. Have multiple group by operators at the reducer - and then - // proceed - if (optimizeMultiGroupBy) { - curr = createCommonReduceSink(qb, input); - - RowResolver currRR = opParseCtx.get(curr).getRR(); - // create a forward operator - input = putOpInsertMap(OperatorFactory.getAndMakeChild(new forwardDesc(), - new RowSchema(currRR.getColumnInfos()), curr), currRR); - - for (String dest : ks) { - curr = input; - curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr); - curr = genSelectPlan(dest, qb, curr); - Integer limit = qbp.getDestLimit(dest); - if (limit != null) { - curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), true); - qb.getParseInfo().setOuterQueryLimit(limit.intValue()); - } - curr = genFileSinkPlan(dest, qb, curr); - } - } - else { - // Go over all the destination tables - for (String dest : ks) { - curr = input; - - if (qbp.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, curr); - } - - if (qbp.getAggregationExprsForClause(dest).size() != 0 - || getGroupByForClause(qbp, dest).size() > 0) - { - // insert a select operator here used by the ColumnPruner to reduce the data to shuffle - curr = insertSelectAllPlanForGroupBy(dest, curr); - if (conf.getVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE).equalsIgnoreCase("true")) { - if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW).equalsIgnoreCase("false")) - curr = genGroupByPlanMapAggr1MR(dest, qb, curr); - else - curr = genGroupByPlanMapAggr2MR(dest, qb, curr); - } - else if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW).equalsIgnoreCase("true")) - curr = genGroupByPlan2MR(dest, qb, curr); - else - curr = genGroupByPlan1MR(dest, qb, curr); - } - - curr = genSelectPlan(dest, qb, curr); - Integer limit = qbp.getDestLimit(dest); - - if (qbp.getClusterByForClause(dest) != null - || qbp.getDistributeByForClause(dest) != null - || qbp.getOrderByForClause(dest) != null - || qbp.getSortByForClause(dest) != null) { - - int numReducers = -1; - - // Use only 1 reducer if order by is present - if (qbp.getOrderByForClause(dest) != null) - numReducers = 1; - - curr = genReduceSinkPlan(dest, qb, curr, numReducers); - } - - if (qbp.getIsSubQ()) { - if (limit != null) { - // In case of order by, only 1 reducer is used, so no need of another shuffle - curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), qbp.getOrderByForClause(dest) != null ? false : true); - } - } else { - curr = genConversionOps(dest, qb, curr); - // exact limit can be taken care of by the fetch operator - if (limit != null) { - boolean extraMRStep = true; - - if (qb.getIsQuery() && - qbp.getClusterByForClause(dest) == null && - qbp.getSortByForClause(dest) == null) - extraMRStep = false; - - curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), extraMRStep); - qb.getParseInfo().setOuterQueryLimit(limit.intValue()); - } - curr = genFileSinkPlan(dest, qb, curr); - } - - // change curr ops row resolver's tab aliases to query alias if it exists - if(qb.getParseInfo().getAlias() != null) { - RowResolver rr = opParseCtx.get(curr).getRR(); - RowResolver newRR = new RowResolver(); - String alias = qb.getParseInfo().getAlias(); - for(ColumnInfo colInfo: rr.getColumnInfos()) { - String name = colInfo.getInternalName(); - String [] tmp = rr.reverseLookup(name); - newRR.put(alias, tmp[1], colInfo); - } - opParseCtx.get(curr).setRR(newRR); - } - } - } - - LOG.debug("Created Body Plan for Query Block " + qb.getId()); - return curr; - } - - @SuppressWarnings("nls") - private Operator genUnionPlan(String unionalias, String leftalias, - Operator leftOp, String rightalias, Operator rightOp) - throws SemanticException { - - // Currently, the unions are not merged - each union has only 2 parents. So, a n-way union will lead to (n-1) union operators. - // This can be easily merged into 1 union - RowResolver leftRR = opParseCtx.get(leftOp).getRR(); - RowResolver rightRR = opParseCtx.get(rightOp).getRR(); - HashMap leftmap = leftRR.getFieldMap(leftalias); - HashMap rightmap = rightRR.getFieldMap(rightalias); - // make sure the schemas of both sides are the same - for (Map.Entry lEntry: leftmap.entrySet()) { - String field = lEntry.getKey(); - ColumnInfo lInfo = lEntry.getValue(); - ColumnInfo rInfo = rightmap.get(field); - if (rInfo == null) { - throw new SemanticException("Schema of both sides of union should match. " - + rightalias + " does not have the field " + field); - } - if (lInfo == null) { - throw new SemanticException("Schema of both sides of union should match. " - + leftalias + " does not have the field " + field); - } - if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { - throw new SemanticException("Schema of both sides of union should match: " - + field + ":" + lInfo.getInternalName() + " " + rInfo.getInternalName()); - } - if (!lInfo.getType().getTypeName().equals(rInfo.getType().getTypeName())) { - throw new SemanticException("Schema of both sides of union should match: Column " - + field + " is of type " + lInfo.getType().getTypeName() + - " on first table and type " + rInfo.getType().getTypeName() + " on second table"); - } - } - - // construct the forward operator - RowResolver unionoutRR = new RowResolver(); - for (Map.Entry lEntry: leftmap.entrySet()) { - String field = lEntry.getKey(); - ColumnInfo lInfo = lEntry.getValue(); - unionoutRR.put(unionalias, field, lInfo); - } - - // If one of the children is a union, merge with it - // else create a new one - if ((leftOp instanceof UnionOperator) || (rightOp instanceof UnionOperator)) - { - if (leftOp instanceof UnionOperator) { - // make left a child of right - List> child = new ArrayList>(); - child.add(leftOp); - rightOp.setChildOperators(child); - - List> parent = leftOp.getParentOperators(); - parent.add(rightOp); - - unionDesc uDesc = ((UnionOperator)leftOp).getConf(); - uDesc.setNumInputs(uDesc.getNumInputs()+1); - return putOpInsertMap(leftOp, unionoutRR); - } - else { - // make right a child of left - List> child = new ArrayList>(); - child.add(rightOp); - leftOp.setChildOperators(child); - - List> parent = rightOp.getParentOperators(); - parent.add(leftOp); - unionDesc uDesc = ((UnionOperator)rightOp).getConf(); - uDesc.setNumInputs(uDesc.getNumInputs()+1); - - return putOpInsertMap(rightOp, unionoutRR); - } - } - - // Create a new union operator - Operator unionforward = - OperatorFactory.getAndMakeChild(new unionDesc(), new RowSchema(unionoutRR.getColumnInfos())); - - // set union operator as child of each of leftOp and rightOp - List> child = new ArrayList>(); - child.add(unionforward); - rightOp.setChildOperators(child); - - child = new ArrayList>(); - child.add(unionforward); - leftOp.setChildOperators(child); - - List> parent = new ArrayList>(); - parent.add(leftOp); - parent.add(rightOp); - unionforward.setParentOperators(parent); - - // create operator info list to return - return putOpInsertMap(unionforward, unionoutRR); - } - - /** - * Generates the sampling predicate from the TABLESAMPLE clause information. This function uses the - * bucket column list to decide the expression inputs to the predicate hash function in case useBucketCols - * is set to true, otherwise the expression list stored in the TableSample is used. The bucket columns of - * the table are used to generate this predicate in case no expressions are provided on the TABLESAMPLE - * clause and the table has clustering columns defined in it's metadata. - * The predicate created has the following structure: - * - * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator - * - * @param ts TABLESAMPLE clause information - * @param bucketCols The clustering columns of the table - * @param useBucketCols Flag to indicate whether the bucketCols should be used as input to the hash - * function - * @param alias The alias used for the table in the row resolver - * @param rwsch The row resolver used to resolve column references - * @param qbm The metadata information for the query block which is used to resolve unaliased columns - * @param planExpr The plan tree for the expression. If the user specified this, the parse expressions are not used - * @return exprNodeDesc - * @exception SemanticException - */ - private exprNodeDesc genSamplePredicate(TableSample ts, List bucketCols, - boolean useBucketCols, String alias, - RowResolver rwsch, QBMetaData qbm, exprNodeDesc planExpr) - throws SemanticException { - - exprNodeDesc numeratorExpr = new exprNodeConstantDesc( - TypeInfoFactory.intTypeInfo, - Integer.valueOf(ts.getNumerator() - 1)); - - exprNodeDesc denominatorExpr = new exprNodeConstantDesc( - TypeInfoFactory.intTypeInfo, - Integer.valueOf(ts.getDenominator())); - - exprNodeDesc intMaxExpr = new exprNodeConstantDesc( - TypeInfoFactory.intTypeInfo, - Integer.valueOf(Integer.MAX_VALUE)); - - ArrayList args = new ArrayList(); - if (planExpr != null) - args.add(planExpr); - else if (useBucketCols) { - for (String col : bucketCols) { - ColumnInfo ci = rwsch.get(alias, col); - // TODO: change type to the one in the table schema - args.add(new exprNodeColumnDesc(ci.getType(), ci.getInternalName(), - ci.getTabAlias(), ci.getIsPartitionCol())); - } - } - else { - for(ASTNode expr: ts.getExprs()) { - args.add(genExprNodeDesc(expr, rwsch)); - } - } - - exprNodeDesc equalsExpr = null; - { - exprNodeDesc hashfnExpr = new exprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, - new GenericUDFHash(), args); - assert(hashfnExpr != null); - LOG.info("hashfnExpr = " + hashfnExpr); - exprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr); - assert(andExpr != null); - LOG.info("andExpr = " + andExpr); - exprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("%", andExpr, denominatorExpr); - assert(modExpr != null); - LOG.info("modExpr = " + modExpr); - LOG.info("numeratorExpr = " + numeratorExpr); - equalsExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", modExpr, numeratorExpr); - LOG.info("equalsExpr = " + equalsExpr); - assert(equalsExpr != null); - } - return equalsExpr; - } - - @SuppressWarnings("nls") - private Operator genTablePlan(String alias, QB qb) throws SemanticException { - - String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias); - Table tab = qb.getMetaData().getSrcForAlias(alias); - RowResolver rwsch; - - // is the table already present - Operator top = this.topOps.get(alias_id); - Operator dummySel = this.topSelOps.get(alias_id); - if (dummySel != null) - top = dummySel; - - if (top == null) { - rwsch = new RowResolver(); - try { - StructObjectInspector rowObjectInspector = (StructObjectInspector)tab.getDeserializer().getObjectInspector(); - List fields = rowObjectInspector.getAllStructFieldRefs(); - for (int i=0; i tableOp = top; - TableSample ts = qb.getParseInfo().getTabSample(alias); - if (ts != null) { - int num = ts.getNumerator(); - int den = ts.getDenominator(); - ArrayList sampleExprs = ts.getExprs(); - - // TODO: Do the type checking of the expressions - List tabBucketCols = tab.getBucketCols(); - int numBuckets = tab.getNumBuckets(); - - // If there are no sample cols and no bucket cols then throw an error - if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) { - throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getName()); - } - - // check if a predicate is needed - // predicate is needed if either input pruning is not enough - // or if input pruning is not possible - - // check if the sample columns are the same as the table bucket columns - boolean colsEqual = true; - if ( (sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0) ) { - colsEqual = false; - } - - for (int i = 0; i < sampleExprs.size() && colsEqual; i++) { - boolean colFound = false; - for (int j = 0; j < tabBucketCols.size() && !colFound; j++) { - if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) { - break; - } - - if (((ASTNode)sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) { - colFound = true; - } - } - colsEqual = (colsEqual && colFound); - } - - // Check if input can be pruned - ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)); - - // check if input pruning is enough - if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual) - && (num == den || den <= numBuckets && numBuckets % den == 0)) { - // input pruning is enough; no need for filter - LOG.info("No need for sample filter"); - // TODO sample predicate is not needed, but we are adding it anyway since - // input pruning is broken for subqueries. will remove this once we move - // compilation of sampling to use the operator tree - exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory.getAndMakeChild( - new filterDesc(samplePredicate, true), - top); - } - else { - // need to add filter - // create tableOp to be filterDesc and set as child to 'top' - LOG.info("Need sample filter"); - exprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null); - tableOp = OperatorFactory.getAndMakeChild( - new filterDesc(samplePredicate, true), - top); - } - } - else { - boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); - if (testMode) { - String tabName = tab.getName(); - - // has the user explicitly asked not to sample this table - String unSampleTblList = conf.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE); - String[] unSampleTbls = unSampleTblList.split(","); - boolean unsample = false; - for (String unSampleTbl : unSampleTbls) - if (tabName.equalsIgnoreCase(unSampleTbl)) - unsample = true; - - if (!unsample) { - int numBuckets = tab.getNumBuckets(); - - // If the input table is bucketed, choose the first bucket - if (numBuckets > 0) { - TableSample tsSample = new TableSample(1, numBuckets); - tsSample.setInputPruning(true); - qb.getParseInfo().setTabSample(alias, tsSample); - LOG.info("No need for sample filter"); - } - // The table is not bucketed, add a dummy filter :: rand() - else { - int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ); - TableSample tsSample = new TableSample(1, freq); - tsSample.setInputPruning(false); - qb.getParseInfo().setTabSample(alias, tsSample); - LOG.info("Need sample filter"); - exprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand", new exprNodeConstantDesc(Integer.valueOf(460476415))); - exprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc); - tableOp = OperatorFactory.getAndMakeChild(new filterDesc(samplePred, true), top); - } - } - } - } - - Operator output = putOpInsertMap(tableOp, rwsch); - LOG.debug("Created Table Plan for " + alias + " " + tableOp.toString()); - - return output; - } - - private Operator genPlan(QBExpr qbexpr) throws SemanticException { - if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - return genPlan(qbexpr.getQB()); - } - if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { - Operator qbexpr1Ops = genPlan(qbexpr.getQBExpr1()); - Operator qbexpr2Ops = genPlan(qbexpr.getQBExpr2()); - - return genUnionPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), - qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); - } - return null; - } - - @SuppressWarnings("nls") - public Operator genPlan(QB qb) throws SemanticException { - - // First generate all the opInfos for the elements in the from clause - HashMap aliasToOpInfo = new HashMap(); - - // Recurse over the subqueries to fill the subquery part of the plan - for (String alias : qb.getSubqAliases()) { - QBExpr qbexpr = qb.getSubqForAlias(alias); - aliasToOpInfo.put(alias, genPlan(qbexpr)); - qbexpr.setAlias(alias); - } - - // Recurse over all the source tables - for (String alias : qb.getTabAliases()) { - aliasToOpInfo.put(alias, genTablePlan(alias, qb)); - } - - Operator srcOpInfo = null; - - // process join - if (qb.getParseInfo().getJoinExpr() != null) { - ASTNode joinExpr = qb.getParseInfo().getJoinExpr(); - - if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { - QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr); - qb.setQbJoinTree(joinTree); - } else { - QBJoinTree joinTree = genJoinTree(qb, joinExpr); - qb.setQbJoinTree(joinTree); - mergeJoinTree(qb); - } - - // if any filters are present in the join tree, push them on top of the table - pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo); - srcOpInfo = genJoinPlan(qb, aliasToOpInfo); - } - else - // Now if there are more than 1 sources then we have a join case - // later we can extend this to the union all case as well - srcOpInfo = aliasToOpInfo.values().iterator().next(); - - Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo); - LOG.debug("Created Plan for Query Block " + qb.getId()); - - this.qb = qb; - return bodyOpInfo; - } - - private Operator getReduceSink(Operator top) { - if (top.getClass() == ReduceSinkOperator.class) { - // Get the operator following the reduce sink - assert (top.getChildOperators().size() == 1); - - return top; - } - - List> childOps = top.getChildOperators(); - if (childOps == null) { - return null; - } - - for (int i = 0; i < childOps.size(); ++i) { - Operator reducer = getReduceSink(childOps.get(i)); - if (reducer != null) { - return reducer; - } - } - - return null; - } - - @SuppressWarnings("nls") - private void genMapRedTasks(QB qb) throws SemanticException { - fetchWork fetch = null; - List> mvTask = new ArrayList>(); - Task fetchTask = null; - - QBParseInfo qbParseInfo = qb.getParseInfo(); - - // Does this query need reduce job - if (qb.isSelectStarQuery() - && qbParseInfo.getDestToClusterBy().isEmpty() - && qbParseInfo.getDestToDistributeBy().isEmpty() - && qbParseInfo.getDestToOrderBy().isEmpty() - && qbParseInfo.getDestToSortBy().isEmpty()) { - boolean noMapRed = false; - - Iterator> iter = qb.getMetaData().getAliasToTable().entrySet().iterator(); - Table tab = ((Map.Entry)iter.next()).getValue(); - if (!tab.isPartitioned()) { - if (qbParseInfo.getDestToWhereExpr().isEmpty()) { - fetch = new fetchWork(tab.getPath().toString(), Utilities.getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit()); - noMapRed = true; - inputs.add(new ReadEntity(tab)); - } - } - else { - - if (topOps.size() == 1) { - TableScanOperator ts = (TableScanOperator)topOps.values().toArray()[0]; - - // check if the pruner only contains partition columns - if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts), opToPartPruner.get(ts))) { - - PrunedPartitionList partsList = null; - try { - partsList = PartitionPruner.prune(topToTable.get(ts), opToPartPruner.get(ts), conf, (String)topOps.keySet().toArray()[0]); - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - - // If there is any unknown partition, create a map-reduce job for the filter to prune correctly - if (partsList.getUnknownPartns().size() == 0) { - List listP = new ArrayList(); - List partP = new ArrayList(); - - Set parts = partsList.getConfirmedPartns(); - Iterator iterParts = parts.iterator(); - while (iterParts.hasNext()) { - Partition part = iterParts.next(); - listP.add(part.getPartitionPath().toString()); - try{ - partP.add(Utilities.getPartitionDesc(part)); - } catch (HiveException e) { - throw new SemanticException(e.getMessage(), e); - } - inputs.add(new ReadEntity(part)); - } - - fetch = new fetchWork(listP, partP, qb.getParseInfo().getOuterQueryLimit()); - noMapRed = true; - } - } - } - } - - if (noMapRed) { - fetchTask = TaskFactory.get(fetch, this.conf); - setFetchTask(fetchTask); - - // remove root tasks if any - rootTasks.clear(); - return; - } - } - - // In case of a select, use a fetch task instead of a move task - if (qb.getIsQuery()) { - if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) - throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); - String cols = loadFileWork.get(0).getColumns(); - String colTypes = loadFileWork.get(0).getColumnTypes(); - - fetch = new fetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(), - new tableDesc(LazySimpleSerDe.class, TextInputFormat.class, - IgnoreKeyTextOutputFormat.class, - Utilities.makeProperties( - org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode, - org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, cols, - org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, colTypes)), - qb.getParseInfo().getOuterQueryLimit()); - - fetchTask = TaskFactory.get(fetch, this.conf); - setFetchTask(fetchTask); - } else { - // First we generate the move work as this needs to be made dependent on all - // the tasks that have a file sink operation - List mv = new ArrayList(); - for (loadTableDesc ltd : loadTableWork) - mvTask.add(TaskFactory.get(new moveWork(null, null, ltd, null, false), this.conf)); - - boolean oneLoadFile = true; - for (loadFileDesc lfd : loadFileWork) { - if ( qb.isCTAS() ) { - assert(oneLoadFile); // should not have more than 1 load file for CTAS - // make the movetask's destination directory the table's destination. - String location = qb.getTableDesc().getLocation(); - if ( location == null ) { - // get the table's default location - location = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); - assert(location.length() > 0 ); - if ( location.charAt(location.length()-1) != '/' ) { - location += '/'; - } - location += qb.getTableDesc().getTableName().toLowerCase(); - } - lfd.setTargetDir(location); - oneLoadFile = false; - } - mvTask.add(TaskFactory.get(new moveWork(null, null, null, lfd, false), this.conf)); - } - } - - // generate map reduce plans - GenMRProcContext procCtx = - new GenMRProcContext( - conf, new HashMap, Task>(), - new ArrayList>(), - getParseContext(), mvTask, this.rootTasks, - new LinkedHashMap, GenMapRedCtx>(), - inputs, outputs); - - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. - // The dispatcher generates the plan from the operator tree - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp(new String("R1"), "TS%"), new GenMRTableScan1()); - opRules.put(new RuleRegExp(new String("R2"), "TS%.*RS%"), new GenMRRedSink1()); - opRules.put(new RuleRegExp(new String("R3"), "RS%.*RS%"), new GenMRRedSink2()); - opRules.put(new RuleRegExp(new String("R4"), "FS%"), new GenMRFileSink1()); - opRules.put(new RuleRegExp(new String("R5"), "UNION%"), new GenMRUnion1()); - opRules.put(new RuleRegExp(new String("R6"), "UNION%.*RS%"), new GenMRRedSink3()); - opRules.put(new RuleRegExp(new String("R6"), "MAPJOIN%.*RS%"), new GenMRRedSink4()); - opRules.put(new RuleRegExp(new String("R7"), "TS%.*MAPJOIN%"), MapJoinFactory.getTableScanMapJoin()); - opRules.put(new RuleRegExp(new String("R8"), "RS%.*MAPJOIN%"), MapJoinFactory.getReduceSinkMapJoin()); - opRules.put(new RuleRegExp(new String("R9"), "UNION%.*MAPJOIN%"), MapJoinFactory.getUnionMapJoin()); - opRules.put(new RuleRegExp(new String("R10"), "MAPJOIN%.*MAPJOIN%"), MapJoinFactory.getMapJoinMapJoin()); - opRules.put(new RuleRegExp(new String("R11"), "MAPJOIN%SEL%"), MapJoinFactory.getMapJoin()); - - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx); - - GraphWalker ogw = new GenMapRedWalker(disp); - ArrayList topNodes = new ArrayList(); - topNodes.addAll(this.topOps.values()); - ogw.startWalking(topNodes, null); - - // reduce sink does not have any kids - since the plan by now has been broken up into multiple - // tasks, iterate over all tasks. - // For each task, go over all operators recursively - for (Task rootTask: rootTasks) - breakTaskTree(rootTask); - - // For each task, set the key descriptor for the reducer - for (Task rootTask: rootTasks) - setKeyDescTaskTree(rootTask); - - // For each operator, generate the counters if needed - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) - for (Task rootTask: rootTasks) - generateCountersTask(rootTask); - - if ( qb.isCTAS() ) { - // generate a DDL task and make it a dependent task of the leaf - createTableDesc crtTblDesc = qb.getTableDesc(); - - validateCreateTable(crtTblDesc); - - // Clear the output for CTAS since we don't need the output from the mapredWork, the - // DDLWork at the tail of the chain will have the output - getOutputs().clear(); - - Task crtTblTask = - TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc), this.conf); - - // find all leaf tasks and make the DDLTask as a dependent task of all of them - HashSet> leaves = new HashSet>(); - getLeafTasks(rootTasks, leaves); - assert(leaves.size() > 0); - for ( Task task: leaves ) { - task.addDependentTask(crtTblTask); - } - } - } - - /** - * Find all leaf tasks of the list of root tasks. - */ - private void getLeafTasks( List> rootTasks, - HashSet> leaves) { - - for ( Task root : rootTasks ) { - getLeafTasks(root, leaves); - } - } - - private void getLeafTasks( Task task, - HashSet> leaves) { - if ( task.getChildTasks() == null ) { - if ( ! leaves.contains(task) ) { - leaves.add(task); - } - } else { - getLeafTasks(task.getChildTasks(), leaves); - } - } - - - // loop over all the tasks recursviely - private void generateCountersTask(Task task) { - if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - HashMap> opMap = ((mapredWork)task.getWork()).getAliasToWork(); - if (!opMap.isEmpty()) { - for (Operator op: opMap.values()) { - generateCountersOperator(op); - } - } - - Operator reducer = ((mapredWork)task.getWork()).getReducer(); - if (reducer != null) { - LOG.info("Generating counters for operator " + reducer); - generateCountersOperator(reducer); - } - } - else if (task instanceof ConditionalTask) { - List> listTasks = ((ConditionalTask)task).getListTasks(); - for (Task tsk : listTasks) - generateCountersTask(tsk); - } - - // Start the counters from scratch - a hack for hadoop 17. - Operator.resetLastEnumUsed(); - - if (task.getChildTasks() == null) - return; - - for (Task childTask : task.getChildTasks()) - generateCountersTask(childTask); - } - - private void generateCountersOperator(Operator op) { - op.assignCounterNameToEnum(); - - if (op.getChildOperators() == null) - return; - - for (Operator child: op.getChildOperators()) - generateCountersOperator(child); - } - - // loop over all the tasks recursviely - private void breakTaskTree(Task task) { - - if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - HashMap> opMap = ((mapredWork)task.getWork()).getAliasToWork(); - if (!opMap.isEmpty()) - for (Operator op: opMap.values()) { - breakOperatorTree(op); - } - } - else if (task instanceof ConditionalTask) { - List> listTasks = ((ConditionalTask)task).getListTasks(); - for (Task tsk : listTasks) - breakTaskTree(tsk); - } - - if (task.getChildTasks() == null) - return; - - for (Task childTask : task.getChildTasks()) - breakTaskTree(childTask); - } - - // loop over all the operators recursviely - private void breakOperatorTree(Operator topOp) { - if (topOp instanceof ReduceSinkOperator) - topOp.setChildOperators(null); - - if (topOp.getChildOperators() == null) - return; - - for (Operator op: topOp.getChildOperators()) - breakOperatorTree(op); - } - - // loop over all the tasks recursviely - private void setKeyDescTaskTree(Task task) { - - if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - mapredWork work = (mapredWork)task.getWork(); - HashMap> opMap = work.getAliasToWork(); - if (!opMap.isEmpty()) - for (Operator op: opMap.values()) - GenMapRedUtils.setKeyAndValueDesc(work, op); - } - else if (task instanceof ConditionalTask) { - List> listTasks = ((ConditionalTask)task).getListTasks(); - for (Task tsk : listTasks) - setKeyDescTaskTree(tsk); - } - - if (task.getChildTasks() == null) - return; - - for (Task childTask : task.getChildTasks()) - setKeyDescTaskTree(childTask); - } - - @SuppressWarnings("nls") - public Phase1Ctx initPhase1Ctx() { - - Phase1Ctx ctx_1 = new Phase1Ctx(); - ctx_1.nextNum = 0; - ctx_1.dest = "reduce"; - - return ctx_1; - } - - @Override - @SuppressWarnings("nls") - public void analyzeInternal(ASTNode ast) throws SemanticException { - reset(); - - QB qb = new QB(null, null, false); - this.qb = qb; - this.ast = ast; - ASTNode child = ast; - - LOG.info("Starting Semantic Analysis"); - - // analyze create table command - if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { - // if it is not CTAS, we don't need to go further and just return - if ( (child = analyzeCreateTable(ast, qb)) == null ) - return; - } - - // continue analyzing from the child ASTNode. - doPhase1(child, qb, initPhase1Ctx()); - LOG.info("Completed phase 1 of Semantic Analysis"); - - getMetaData(qb); - LOG.info("Completed getting MetaData in Semantic Analysis"); - - genPlan(qb); - - - ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, aliasToSamplePruner, topOps, - topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, - ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer); - - Optimizer optm = new Optimizer(); - optm.setPctx(pCtx); - optm.initialize(conf); - pCtx = optm.optimize(); - init(pCtx); - qb = pCtx.getQB(); - - // Do any sample pruning - genSamplePruners(qb); - LOG.info("Completed sample pruning"); - - // At this point we have the complete operator tree - // from which we want to find the reduce operator - genMapRedTasks(qb); - - LOG.info("Completed plan generation"); - - return; - } - - /** - * Generates and expression node descriptor for the expression passed in the arguments. This - * function uses the row resolver and the metadata informatinon that are passed as arguments - * to resolve the column names to internal names. - * @param expr The expression - * @param input The row resolver - * @return exprNodeDesc - * @throws SemanticException - */ - @SuppressWarnings("nls") - public static exprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) - throws SemanticException { - // We recursively create the exprNodeDesc. Base cases: when we encounter - // a column ref, we convert that into an exprNodeColumnDesc; when we encounter - // a constant, we convert that into an exprNodeConstantDesc. For others we just - // build the exprNodeFuncDesc with recursively built children. - - // If the current subExpression is pre-calculated, as in Group-By etc. - ColumnInfo colInfo = input.get("", expr.toStringTree()); - if (colInfo != null) { - return new exprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsPartitionCol()); - } - - // Create the walker, the rules dispatcher and the context. - TypeCheckCtx tcCtx = new TypeCheckCtx(input); - - // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher - // generates the plan from the operator tree - Map opRules = new LinkedHashMap(); - StringBuilder sb = new StringBuilder(); - Formatter fm = new Formatter(sb); - opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), TypeCheckProcFactory.getNullExprProcessor()); - opRules.put(new RuleRegExp("R2", HiveParser.Number + "%"), TypeCheckProcFactory.getNumExprProcessor()); - opRules.put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + - HiveParser.StringLiteral + "%|" + - HiveParser.TOK_CHARSETLITERAL + "%|" + - HiveParser.KW_IF + "%|" + - HiveParser.KW_CASE + "%|" + - HiveParser.KW_WHEN + "%"), - TypeCheckProcFactory.getStrExprProcessor()); - opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" + HiveParser.KW_FALSE + "%"), - TypeCheckProcFactory.getBoolExprProcessor()); - opRules.put(new RuleRegExp("R5", HiveParser.TOK_TABLE_OR_COL + "%"), TypeCheckProcFactory.getColumnExprProcessor()); - - // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(TypeCheckProcFactory.getDefaultExprProcessor(), opRules, tcCtx); - GraphWalker ogw = new DefaultGraphWalker(disp); - - // Create a list of topop nodes - ArrayList topNodes = new ArrayList(); - topNodes.add(expr); - HashMap nodeOutputs = new HashMap(); - ogw.startWalking(topNodes, nodeOutputs); - exprNodeDesc desc = (exprNodeDesc)nodeOutputs.get(expr); - if (desc == null) { - throw new SemanticException(tcCtx.getError()); - } - - return desc; - } - - /** - * Gets the table Alias for the column from the column name. This function throws - * and exception in case the same column name is present in multiple table. The exception - * message indicates that the ambiguity could not be resolved. - * - * @param qbm The metadata where the function looks for the table alias - * @param colName The name of the non aliased column - * @param pt The parse tree corresponding to the column(this is used for error reporting) - * @return String - * @throws SemanticException - */ - static String getTabAliasForCol(QBMetaData qbm, String colName, ASTNode pt) - throws SemanticException { - String tabAlias = null; - boolean found = false; - - for(Map.Entry ent: qbm.getAliasToTable().entrySet()) { - for(FieldSchema field: ent.getValue().getAllCols()) { - if (colName.equalsIgnoreCase(field.getName())) { - if (found) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(pt)); - } - - found = true; - tabAlias = ent.getKey(); - } - } - } - return tabAlias; - } - - - public void validate() throws SemanticException { - // Check if the plan contains atleast one path. - - // validate all tasks - for(Task rootTask: rootTasks) - validate(rootTask); - } - - private void validate(Task task) throws SemanticException { - if ((task instanceof MapRedTask) || (task instanceof ExecDriver)) { - mapredWork work = (mapredWork)task.getWork(); - - // If the plan does not contain any path, an empty file - // will be added by ExecDriver at execute time - } - - if (task.getChildTasks() == null) - return; - - for (Task childTask : task.getChildTasks()) - validate(childTask); - } - - - /** - * Get the row resolver given an operator. - */ - public RowResolver getRowResolver(Operator opt) { - return opParseCtx.get(opt).getRR(); - } - - /** - * Analyze the create table command. If it is a regular create-table or create-table-like - * statements, we create a DDLWork and return true. If it is a create-table-as-select, we get the - * necessary info such as the SerDe and Storage Format and put it in QB, and return false, indicating - * the rest of the semantic analyzer need to deal with the select statement with respect to the - * SerDe and Storage Format. - */ - private ASTNode analyzeCreateTable(ASTNode ast, QB qb) - throws SemanticException { - String tableName = unescapeIdentifier(ast.getChild(0).getText()); - String likeTableName = null; - List cols = null; - List partCols = null; - List bucketCols = null; - List sortCols = null; - int numBuckets = -1; - String fieldDelim = null; - String fieldEscape = null; - String collItemDelim = null; - String mapKeyDelim = null; - String lineDelim = null; - String comment = null; - String inputFormat = TEXTFILE_INPUT; - String outputFormat = TEXTFILE_OUTPUT; - String location = null; - String serde = null; - Map mapProp = null; - boolean ifNotExists = false; - boolean isExt = false; - ASTNode selectStmt = null; - final int CREATE_TABLE = 0; // regular CREATE TABLE - final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) - final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS) - int command_type = CREATE_TABLE; - - if ("SequenceFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { - inputFormat = SEQUENCEFILE_INPUT; - outputFormat = SEQUENCEFILE_OUTPUT; - } else if ("RCFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { - inputFormat = RCFILE_INPUT; - outputFormat = RCFILE_OUTPUT; - serde = COLUMNAR_SERDE; - } - - LOG.info("Creating table" + tableName + " positin=" + ast.getCharPositionInLine()); - int numCh = ast.getChildCount(); - - /* Check the 1st-level children and do simple semantic checks: - * 1) CTLT and CTAS should not coexists. - * 2) CTLT or CTAS should not coexists with column list (target table schema). - * 3) CTAS does not support partitioning (for now). - */ - for (int num = 1; num < numCh; num++) - { - ASTNode child = (ASTNode)ast.getChild(num); - switch (child.getToken().getType()) { - case HiveParser.TOK_IFNOTEXISTS: - ifNotExists = true; - break; - case HiveParser.KW_EXTERNAL: - isExt = true; - break; - case HiveParser.TOK_LIKETABLE: - if (child.getChildCount() > 0) { - likeTableName = unescapeIdentifier(child.getChild(0).getText()); - if ( likeTableName != null ) { - if ( command_type == CTAS ) { - throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); - } - if ( cols != null ) { - throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg()); - } - } - command_type = CTLT; - } - break; - case HiveParser.TOK_QUERY: // CTAS - if ( command_type == CTLT ) { - throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); - } - if ( cols != null ) { - throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg()); - } - // TODO: support partition for CTAS? - if ( partCols != null || bucketCols != null ) { - throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); - } - if ( isExt ) { - throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); - } - command_type = CTAS; - selectStmt = child; - break; - case HiveParser.TOK_TABCOLLIST: - cols = getColumns(child); - break; - case HiveParser.TOK_TABLECOMMENT: - comment = unescapeSQLString(child.getChild(0).getText()); - break; - case HiveParser.TOK_TABLEPARTCOLS: - partCols = getColumns((ASTNode)child.getChild(0)); - break; - case HiveParser.TOK_TABLEBUCKETS: - bucketCols = getColumnNames((ASTNode)child.getChild(0)); - if (child.getChildCount() == 2) - numBuckets = (Integer.valueOf(child.getChild(1).getText())).intValue(); - else - { - sortCols = getColumnNamesOrder((ASTNode)child.getChild(1)); - numBuckets = (Integer.valueOf(child.getChild(2).getText())).intValue(); - } - break; - case HiveParser.TOK_TABLEROWFORMAT: - - child = (ASTNode)child.getChild(0); - int numChildRowFormat = child.getChildCount(); - for (int numC = 0; numC < numChildRowFormat; numC++) - { - ASTNode rowChild = (ASTNode)child.getChild(numC); - switch (rowChild.getToken().getType()) { - case HiveParser.TOK_TABLEROWFORMATFIELD: - fieldDelim = unescapeSQLString(rowChild.getChild(0).getText()); - if (rowChild.getChildCount()>=2) { - fieldEscape = unescapeSQLString(rowChild.getChild(1).getText()); - } - break; - case HiveParser.TOK_TABLEROWFORMATCOLLITEMS: - collItemDelim = unescapeSQLString(rowChild.getChild(0).getText()); - break; - case HiveParser.TOK_TABLEROWFORMATMAPKEYS: - mapKeyDelim = unescapeSQLString(rowChild.getChild(0).getText()); - break; - case HiveParser.TOK_TABLEROWFORMATLINES: - lineDelim = unescapeSQLString(rowChild.getChild(0).getText()); - break; - default: assert false; - } - } - break; - case HiveParser.TOK_TABLESERIALIZER: - - child = (ASTNode)child.getChild(0); - serde = unescapeSQLString(child.getChild(0).getText()); - if (child.getChildCount() == 2) { - mapProp = new HashMap(); - ASTNode prop = (ASTNode)((ASTNode)child.getChild(1)).getChild(0); - for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { - String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); - String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); - mapProp.put(key,value); - } - } - break; - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = SEQUENCEFILE_INPUT; - outputFormat = SEQUENCEFILE_OUTPUT; - break; - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = TEXTFILE_INPUT; - outputFormat = TEXTFILE_OUTPUT; - break; - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFILE_INPUT; - outputFormat = RCFILE_OUTPUT; - serde = COLUMNAR_SERDE; - break; - case HiveParser.TOK_TABLEFILEFORMAT: - inputFormat = unescapeSQLString(child.getChild(0).getText()); - outputFormat = unescapeSQLString(child.getChild(1).getText()); - break; - case HiveParser.TOK_TABLELOCATION: - location = unescapeSQLString(child.getChild(0).getText()); - break; - default: assert false; - } - } - - // check for existence of table - if ( ifNotExists ) { - try { - List tables = this.db.getTablesByPattern(tableName); - if ( tables != null && tables.size() > 0 ) { // table exists - return null; - } - } catch (HiveException e) { - e.printStackTrace(); - } - } - - // Handle different types of CREATE TABLE command - createTableDesc crtTblDesc = null; - switch ( command_type ) { - - case CREATE_TABLE: // REGULAR CREATE TABLE DDL - crtTblDesc = - new createTableDesc(tableName, isExt, cols, partCols, bucketCols, - sortCols, numBuckets, - fieldDelim, fieldEscape, - collItemDelim, mapKeyDelim, lineDelim, - comment, inputFormat, outputFormat, location, serde, - mapProp, ifNotExists); - - validateCreateTable(crtTblDesc); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc), conf)); - break; - - case CTLT: // create table like - createTableLikeDesc crtTblLikeDesc = - new createTableLikeDesc(tableName, isExt, location, ifNotExists, likeTableName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc), conf)); - break; - - case CTAS: // create table as select - - // check for existence of table. Throw an exception if it exists. - try { - Table tab = this.db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, - false); // do not throw exception if table does not exist - - if ( tab != null ) { - throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(tableName)); - } - } catch (HiveException e) { // may be unable to get meta data - throw new SemanticException(e); - } - - crtTblDesc = - new createTableDesc(tableName, isExt, cols, partCols, bucketCols, - sortCols, numBuckets, - fieldDelim, fieldEscape, - collItemDelim, mapKeyDelim, lineDelim, - comment, inputFormat, outputFormat, location, serde, - mapProp, ifNotExists); - qb.setTableDesc(crtTblDesc); - - return selectStmt; - default: assert false; // should never be unknown command type - } - return null; - } - - private void validateCreateTable(createTableDesc crtTblDesc) throws SemanticException { - // no duplicate column names - // currently, it is a simple n*n algorithm - this can be optimized later if need be - // but it should not be a major bottleneck as the number of columns are anyway not so big - - if((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) { - // for now make sure that serde exists - if(StringUtils.isEmpty(crtTblDesc.getSerName()) || SerDeUtils.isNativeSerDe(crtTblDesc.getSerName())) { - throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg()); - } - return; - } - - try { - Class origin = Class.forName(crtTblDesc.getOutputFormat(), true, JavaUtils.getClassLoader()); - Class replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin); - if(replaced == null) - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); - } catch (ClassNotFoundException e) { - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); - } - - Iterator iterCols = crtTblDesc.getCols().iterator(); - List colNames = new ArrayList(); - while (iterCols.hasNext()) { - String colName = iterCols.next().getName(); - Iterator iter = colNames.iterator(); - while (iter.hasNext()) { - String oldColName = iter.next(); - if (colName.equalsIgnoreCase(oldColName)) - throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES.getMsg()); - } - colNames.add(colName); - } - - if (crtTblDesc.getBucketCols() != null) - { - // all columns in cluster and sort are valid columns - Iterator bucketCols = crtTblDesc.getBucketCols().iterator(); - while (bucketCols.hasNext()) { - String bucketCol = bucketCols.next(); - boolean found = false; - Iterator colNamesIter = colNames.iterator(); - while (colNamesIter.hasNext()) { - String colName = colNamesIter.next(); - if (bucketCol.equalsIgnoreCase(colName)) { - found = true; - break; - } - } - if (!found) - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); - } - } - - if (crtTblDesc.getSortCols() != null) - { - // all columns in cluster and sort are valid columns - Iterator sortCols = crtTblDesc.getSortCols().iterator(); - while (sortCols.hasNext()) { - String sortCol = sortCols.next().getCol(); - boolean found = false; - Iterator colNamesIter = colNames.iterator(); - while (colNamesIter.hasNext()) { - String colName = colNamesIter.next(); - if (sortCol.equalsIgnoreCase(colName)) { - found = true; - break; - } - } - if (!found) - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); - } - } - - if (crtTblDesc.getPartCols() != null) - { - // there is no overlap between columns and partitioning columns - Iterator partColsIter = crtTblDesc.getPartCols().iterator(); - while (partColsIter.hasNext()) { - String partCol = partColsIter.next().getName(); - Iterator colNamesIter = colNames.iterator(); - while (colNamesIter.hasNext()) { - String colName = unescapeIdentifier(colNamesIter.next()); - if (partCol.equalsIgnoreCase(colName)) - throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg()); - } - } - } - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -1,386 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse; - -import java.io.Serializable; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.plan.exprNodeDesc; -import org.apache.hadoop.hive.ql.plan.loadFileDesc; -import org.apache.hadoop.hive.ql.plan.loadTableDesc; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; - -/** - * Parse Context: The current parse context. This is passed to the optimizer - * which then transforms the operator tree using the parse context. All the - * optimizations are performed sequentially and then the new parse context - * populated. Note that since the parse context contains the operator tree, it - * can be easily retrieved by the next optimization step or finally for task - * generation after the plan has been completely optimized. - * - **/ - -public class ParseContext { - private QB qb; - private ASTNode ast; - private HashMap opToPartPruner; - private HashMap aliasToSamplePruner; - private HashMap> topOps; - private HashMap> topSelOps; - private LinkedHashMap, OpParseContext> opParseCtx; - private Map joinContext; - private HashMap topToTable; - private List loadTableWork; - private List loadFileWork; - private Context ctx; - private HiveConf conf; - private HashMap idToTableNameMap; - private int destTableId; - private UnionProcContext uCtx; - private List listMapJoinOpsNoReducer; // list of map join operators with no reducer - - // is set to true if the expression only contains partitioning columns and not any other column reference. - // This is used to optimize select * from table where ... scenario, when the where condition only references - // partitioning columns - the partitions are identified and streamed directly to the client without requiring - // a map-reduce job - private boolean hasNonPartCols; - - public ParseContext() { - } - - /** - * @param qb - * current QB - * @param ast - * current parse tree - * @param opToPartPruner - * map from table scan operator to partition pruner - * @param aliasToSamplePruner - * sample pruner list - * @param topOps - * list of operators for the top query - * @param topSelOps - * list of operators for the selects introduced for column pruning - * @param opParseCtx - * operator parse context - contains a mapping from operator to - * operator parse state (row resolver etc.) - * @param joinContext context needed join processing (map join specifically) - * @param topToTable the top tables being processed - * @param loadTableWork - * list of destination tables being loaded - * @param loadFileWork - * list of destination files being loaded - * @param ctx parse context - * @param idToTableNameMap - * @param destTableId - * @param uCtx - * @param listMapJoinOpsNoReducer - * list of map join operators with no reducer - */ - public ParseContext(HiveConf conf, QB qb, ASTNode ast, - HashMap opToPartPruner, - HashMap aliasToSamplePruner, - HashMap> topOps, - HashMap> topSelOps, - LinkedHashMap, OpParseContext> opParseCtx, - Map joinContext, - HashMap topToTable, - List loadTableWork, List loadFileWork, - Context ctx, HashMap idToTableNameMap, int destTableId, UnionProcContext uCtx, - List listMapJoinOpsNoReducer) { - this.conf = conf; - this.qb = qb; - this.ast = ast; - this.opToPartPruner = opToPartPruner; - this.aliasToSamplePruner = aliasToSamplePruner; - this.joinContext = joinContext; - this.topToTable = topToTable; - this.loadFileWork = loadFileWork; - this.loadTableWork = loadTableWork; - this.opParseCtx = opParseCtx; - this.topOps = topOps; - this.topSelOps = topSelOps; - this.ctx = ctx; - this.idToTableNameMap = idToTableNameMap; - this.destTableId = destTableId; - this.uCtx = uCtx; - this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; - this.hasNonPartCols = false; - } - - /** - * @return the qb - */ - public QB getQB() { - return qb; - } - - /** - * @param qb - * the qb to set - */ - public void setQB(QB qb) { - this.qb = qb; - } - - /** - * @return the context - */ - public Context getContext() { - return ctx; - } - - /** - * @param ctx - * the context to set - */ - public void setContext(Context ctx) { - this.ctx = ctx; - } - - /** - * @return the hive conf - */ - public HiveConf getConf() { - return conf; - } - - /** - * @param conf - * the conf to set - */ - public void setConf(HiveConf conf) { - this.conf = conf; - } - - /** - * @return the ast - */ - public ASTNode getParseTree() { - return ast; - } - - /** - * @param ast - * the parsetree to set - */ - public void setParseTree(ASTNode ast) { - this.ast = ast; - } - - /** - * @return the opToPartPruner - */ - public HashMap getOpToPartPruner() { - return opToPartPruner; - } - - /** - * @param opToPartPruner - * the opToPartPruner to set - */ - public void setOpToPartPruner(HashMap opToPartPruner) { - this.opToPartPruner = opToPartPruner; - } - - /** - * @return the topToTable - */ - public HashMap getTopToTable() { - return topToTable; - } - - /** - * @param topToTable - * the topToTable to set - */ - public void setTopToTable(HashMap topToTable) { - this.topToTable = topToTable; - } - /** - * @return the aliasToSamplePruner - */ - public HashMap getAliasToSamplePruner() { - return aliasToSamplePruner; - } - - /** - * @param aliasToSamplePruner - * the aliasToSamplePruner to set - */ - public void setAliasToSamplePruner( - HashMap aliasToSamplePruner) { - this.aliasToSamplePruner = aliasToSamplePruner; - } - - /** - * @return the topOps - */ - public HashMap> getTopOps() { - return topOps; - } - - /** - * @param topOps - * the topOps to set - */ - public void setTopOps(HashMap> topOps) { - this.topOps = topOps; - } - - /** - * @return the topSelOps - */ - public HashMap> getTopSelOps() { - return topSelOps; - } - - /** - * @param topSelOps - * the topSelOps to set - */ - public void setTopSelOps( - HashMap> topSelOps) { - this.topSelOps = topSelOps; - } - - /** - * @return the opParseCtx - */ - public LinkedHashMap, OpParseContext> getOpParseCtx() { - return opParseCtx; - } - - /** - * @param opParseCtx - * the opParseCtx to set - */ - public void setOpParseCtx( - LinkedHashMap, OpParseContext> opParseCtx) { - this.opParseCtx = opParseCtx; - } - - /** - * @return the loadTableWork - */ - public List getLoadTableWork() { - return loadTableWork; - } - - /** - * @param loadTableWork - * the loadTableWork to set - */ - public void setLoadTableWork(List loadTableWork) { - this.loadTableWork = loadTableWork; - } - - /** - * @return the loadFileWork - */ - public List getLoadFileWork() { - return loadFileWork; - } - - /** - * @param loadFileWork - * the loadFileWork to set - */ - public void setLoadFileWork(List loadFileWork) { - this.loadFileWork = loadFileWork; - } - - public HashMap getIdToTableNameMap() { - return idToTableNameMap; - } - - public void setIdToTableNameMap(HashMap idToTableNameMap) { - this.idToTableNameMap = idToTableNameMap; - } - - public int getDestTableId() { - return destTableId; - } - - public void setDestTableId(int destTableId) { - this.destTableId = destTableId; - } - - public UnionProcContext getUCtx() { - return uCtx; - } - - public void setUCtx(UnionProcContext uCtx) { - this.uCtx = uCtx; - } - - /** - * @return the joinContext - */ - public Map getJoinContext() { - return joinContext; - } - - /** - * @param joinContext the joinContext to set - */ - public void setJoinContext(Map joinContext) { - this.joinContext = joinContext; - } - - /** - * @return the listMapJoinOpsNoReducer - */ - public List getListMapJoinOpsNoReducer() { - return listMapJoinOpsNoReducer; - } - - /** - * @param listMapJoinOpsNoReducer the listMapJoinOpsNoReducer to set - */ - public void setListMapJoinOpsNoReducer( - List listMapJoinOpsNoReducer) { - this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; - } - - /** - * Sets the hasNonPartCols flag - * @param val - */ - public void setHasNonPartCols(boolean val) { - this.hasNonPartCols = val; - } - - /** - * Gets the value of the hasNonPartCols flag - */ - public boolean getHasNonPartCols() { - return this.hasNonPartCols; - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePrunerGenerator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePrunerGenerator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePrunerGenerator.java (revision 0) @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + + +public class SamplePrunerGenerator { + + + @SuppressWarnings("nls") + public static void genSamplePruners(LogicalPlan logicalPlan, QB qb) throws SemanticException { + // Recursively prune subqueries + for (String alias : qb.getSubqAliases()) { + QBExpr qbexpr = qb.getSubqForAlias(alias); + genSamplePruners(logicalPlan, qbexpr); + } + + for (String alias : qb.getTabAliases()) { + String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias); + QBParseInfo qbp = qb.getParseInfo(); + TableSample tableSample = qbp.getTabSample(alias_id); + if (tableSample != null) { + SamplePruner pruner = new SamplePruner(alias, tableSample); + logicalPlan.addSamplePruner(alias_id, pruner); + } + } + } + + private static void genSamplePruners(LogicalPlan logicalPlan, QBExpr qbexpr) throws SemanticException { + if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { + genSamplePruners(logicalPlan, qbexpr.getQB()); + } else { + genSamplePruners(logicalPlan, qbexpr.getQBExpr1()); + genSamplePruners(logicalPlan, qbexpr.getQBExpr2()); + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (working copy) @@ -125,7 +125,11 @@ public Vector getColumnInfos() { return rowSchema.getSignature(); } - + + public RowSchema getRowSchema() { + return new RowSchema(rowSchema); + } + public HashMap getFieldMap(String tab_alias) { return rslvMap.get(tab_alias.toLowerCase()); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (working copy) @@ -37,31 +37,30 @@ } public void analyzeInternal(ASTNode ast) throws SemanticException { - ctx.setExplain(true); + Context context = getContext(); + context.setExplain(true); // Create a semantic analyzer for the query - BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode)ast.getChild(0)); - sem.analyze((ASTNode)ast.getChild(0), ctx); + BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(getHiveConf(), (ASTNode)ast.getChild(0)); + sem.analyze((ASTNode)ast.getChild(0), context); boolean extended = false; if (ast.getChildCount() > 1) { extended = true; } - ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); - List> tasks = sem.getRootTasks(); - Task fetchTask = sem.getFetchTask(); - if (tasks == null) { - if (fetchTask != null) { - tasks = new ArrayList>(); - tasks.add(fetchTask); - } + context.setResFile(new Path(context.getLocalTmpFileURI())); + + Task fetchTask = sem.getPhysicalPlan().getFetchTask(); + if (null != fetchTask) { + sem.getPhysicalPlan().addRootTask(fetchTask); } - else if (fetchTask != null) - tasks.add(fetchTask); - rootTasks.add(TaskFactory.get(new explainWork(ctx.getResFile(), tasks, - ((ASTNode)ast.getChild(0)).toStringTree(), - extended), this.conf)); + explainWork expWork = new explainWork(context.getResFile(), + sem.getPhysicalPlan().getRootTasks(), + ((ASTNode)ast.getChild(0)).toStringTree(), + extended); + + getPhysicalPlan().addRootTask(TaskFactory.get(expWork, getHiveConf())); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (working copy) @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import java.io.UnsupportedEncodingException; + /** * Library of utility functions used in the parse code * @@ -31,12 +33,156 @@ * @return boolean */ public static boolean isJoinToken(ASTNode node) { - if ((node.getToken().getType() == HiveParser.TOK_JOIN) - || (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN) - || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)) - return true; + switch (node.getToken().getType()) { + case HiveParser.TOK_JOIN: + case HiveParser.TOK_LEFTOUTERJOIN: + case HiveParser.TOK_RIGHTOUTERJOIN: + case HiveParser.TOK_FULLOUTERJOIN: + case HiveParser.TOK_LEFTSEMIJOIN: + case HiveParser.TOK_UNIQUEJOIN: + return true; + default: + return false; + } + } - return false; - } + public static String stripQuotes(String val) throws SemanticException { + if ((val.charAt(0) == '\'' && val.charAt(val.length() - 1) == '\'') + || (val.charAt(0) == '\"' && val.charAt(val.length() - 1) == '\"')) { + val = val.substring(1, val.length() - 1); + } + return val; + } + + public static String charSetString(String charSetName, String charSetString) + throws SemanticException { + try + { + // The character set name starts with a _, so strip that + charSetName = charSetName.substring(1); + if (charSetString.charAt(0) == '\'') + return new String(ParseUtils.unescapeSQLString(charSetString).getBytes(), charSetName); + else // hex input is also supported + { + assert charSetString.charAt(0) == '0'; + assert charSetString.charAt(1) == 'x'; + charSetString = charSetString.substring(2); + + byte[] bArray = new byte[charSetString.length()/2]; + int j = 0; + for (int i = 0; i < charSetString.length(); i += 2) + { + int val = Character.digit(charSetString.charAt(i), 16) * 16 + Character.digit(charSetString.charAt(i+1), 16); + if (val > 127) + val = val - 256; + bArray[j++] = new Integer(val).byteValue(); + } + + String res = new String(bArray, charSetName); + return res; + } + } catch (UnsupportedEncodingException e) { + throw new SemanticException(e); + } + } + + /** + * Remove the encapsulating "`" pair from the identifier. + * We allow users to use "`" to escape identifier for table names, + * column names and aliases, in case that coincide with Hive language + * keywords. + */ + public static String unescapeIdentifier(String val) { + if (val == null) { + return null; + } + if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') { + val = val.substring(1, val.length() - 1); + } + return val; + } + + @SuppressWarnings("nls") + public static String unescapeSQLString(String b) { + + Character enclosure = null; + + // Some of the strings can be passed in as unicode. For example, the + // delimiter can be passed in as \002 - So, we first check if the + // string is a unicode number, else go back to the old behavior + StringBuilder sb = new StringBuilder(b.length()); + for (int i=0; i < b.length(); i++) { + + char currentChar = b.charAt(i); + if (enclosure == null) { + if (currentChar == '\'' || b.charAt(i) == '\"') { + enclosure = currentChar; + } + // ignore all other chars outside the enclosure + continue; + } + + if (enclosure.equals(currentChar)) { + enclosure = null; + continue; + } + + if (currentChar == '\\' && (i+4 < b.length())) { + char i1 = b.charAt(i+1); + char i2 = b.charAt(i+2); + char i3 = b.charAt(i+3); + if ((i1 >= '0' && i1 <= '1') && + (i2 >= '0' && i2 <= '7') && + (i3 >= '0' && i3 <= '7')) + { + byte bVal = (byte)((i3 - '0') + ((i2 - '0') * 8 ) + ((i1 - '0') * 8 * 8)); + byte[] bValArr = new byte[1]; + bValArr[0] = bVal; + String tmp = new String(bValArr); + sb.append(tmp); + i += 3; + continue; + } + } + + if (currentChar == '\\' && (i+2 < b.length())) { + char n=b.charAt(i+1); + switch(n) { + case '0': sb.append("\0"); break; + case '\'': sb.append("'"); break; + case '"': sb.append("\""); break; + case 'b': sb.append("\b"); break; + case 'n': sb.append("\n"); break; + case 'r': sb.append("\r"); break; + case 't': sb.append("\t"); break; + case 'Z': sb.append("\u001A"); break; + case '\\': sb.append("\\"); break; + // The following 2 lines are exactly what MySQL does + case '%': sb.append("\\%"); break; + case '_': sb.append("\\_"); break; + default: sb.append(n); + } + i++; + } else { + sb.append(currentChar); + } + } + return sb.toString(); + } + + /** + * Returns whether the pattern is a regex expression (instead of a normal string). + * Normal string is a string with all alphabets/digits and "_". + */ + static boolean isRegex(String pattern) { + for(int i=0; i mapProp = getProps((ASTNode)(ast.getChild(1)).getChild(0)); alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDPROPS); alterTblDesc.setProps(mapProp); alterTblDesc.setOldName(tableName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), alterTblDesc), getHiveConf())); } private void analyzeAlterTableSerdeProps(ASTNode ast) throws SemanticException { - String tableName = unescapeIdentifier(ast.getChild(0).getText()); + String tableName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); HashMap mapProp = getProps((ASTNode)(ast.getChild(1)).getChild(0)); alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDSERDEPROPS); alterTblDesc.setProps(mapProp); alterTblDesc.setOldName(tableName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), alterTblDesc), getHiveConf())); } private void analyzeAlterTableSerde(ASTNode ast) throws SemanticException { - String tableName = unescapeIdentifier(ast.getChild(0).getText()); - String serdeName = unescapeSQLString(ast.getChild(1).getText()); + String tableName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); + String serdeName = ParseUtils.unescapeSQLString(ast.getChild(1).getText()); alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDSERDE); if(ast.getChildCount() > 2) { HashMap mapProp = getProps((ASTNode)(ast.getChild(2)).getChild(0)); @@ -173,11 +171,11 @@ } alterTblDesc.setOldName(tableName); alterTblDesc.setSerdeName(serdeName); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), alterTblDesc), getHiveConf())); } private void analyzeAlterTableFileFormat(ASTNode ast) throws SemanticException { - String tableName = unescapeIdentifier(ast.getChild(0).getText()); + String tableName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); String inputFormat = null; String outputFormat = null; String serde = null; @@ -185,9 +183,9 @@ switch (child.getToken().getType()) { case HiveParser.TOK_TABLEFILEFORMAT: - inputFormat = unescapeSQLString(((ASTNode) child.getChild(0)).getToken() + inputFormat = ParseUtils.unescapeSQLString(((ASTNode) child.getChild(0)).getToken() .getText()); - outputFormat = unescapeSQLString(((ASTNode) child.getChild(1)).getToken() + outputFormat = ParseUtils.unescapeSQLString(((ASTNode) child.getChild(1)).getToken() .getText()); try { Class.forName(inputFormat); @@ -211,14 +209,14 @@ break; } alterTableDesc alterTblDesc = new alterTableDesc(tableName, inputFormat, outputFormat, serde); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), alterTblDesc), getHiveConf())); } private HashMap getProps(ASTNode prop) { HashMap mapProp = new HashMap(); for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { - String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); - String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); + String key = ParseUtils.unescapeSQLString(prop.getChild(propChild).getChild(0).getText()); + String value = ParseUtils.unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); mapProp.put(key,value); } return mapProp; @@ -255,12 +253,12 @@ prop.setProperty("columns.types", colTypes[1]); fetchWork fetch = new fetchWork( - ctx.getResFile().toString(), + getContext().getResFile().toString(), new tableDesc(LazySimpleSerDe.class, TextInputFormat.class, IgnoreKeyTextOutputFormat.class, prop), -1 ); fetch.setSerializationNullFormat(" "); - return TaskFactory.get(fetch, this.conf); + return TaskFactory.get(fetch, getHiveConf()); } private void analyzeDescribeTable(ASTNode ast) @@ -276,9 +274,9 @@ } boolean isExt = ast.getChildCount() > 1; - descTableDesc descTblDesc = new descTableDesc(ctx.getResFile(), tableName, partSpec, isExt); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), descTblDesc), conf)); - setFetchTask(createFetchTask(descTblDesc.getSchema())); + descTableDesc descTblDesc = new descTableDesc(getContext().getResFile(), tableName, partSpec, isExt); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), descTblDesc), getHiveConf())); + getPhysicalPlan().setFetchTask(createFetchTask(descTblDesc.getSchema())); LOG.info("analyzeDescribeTable done"); } @@ -288,7 +286,7 @@ HashMap partSpec = new LinkedHashMap(); for (int i = 0; i < partspec.getChildCount(); ++i) { ASTNode partspec_val = (ASTNode) partspec.getChild(i); - String val = stripQuotes(partspec_val.getChild(1).getText()); + String val = ParseUtils.stripQuotes(partspec_val.getChild(1).getText()); partSpec.put(partspec_val.getChild(0).getText().toLowerCase(), val); } return partSpec; @@ -297,10 +295,10 @@ private void analyzeShowPartitions(ASTNode ast) throws SemanticException { showPartitionsDesc showPartsDesc; - String tableName = unescapeIdentifier(ast.getChild(0).getText()); - showPartsDesc = new showPartitionsDesc(tableName, ctx.getResFile()); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showPartsDesc), conf)); - setFetchTask(createFetchTask(showPartsDesc.getSchema())); + String tableName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); + showPartsDesc = new showPartitionsDesc(tableName, getContext().getResFile()); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), showPartsDesc), getHiveConf())); + getPhysicalPlan().setFetchTask(createFetchTask(showPartsDesc.getSchema())); } private void analyzeShowTables(ASTNode ast) @@ -308,20 +306,20 @@ showTablesDesc showTblsDesc; if (ast.getChildCount() == 1) { - String tableNames = unescapeSQLString(ast.getChild(0).getText()); - showTblsDesc = new showTablesDesc(ctx.getResFile(), tableNames); + String tableNames = ParseUtils.unescapeSQLString(ast.getChild(0).getText()); + showTblsDesc = new showTablesDesc(getContext().getResFile(), tableNames); } else { - showTblsDesc = new showTablesDesc(ctx.getResFile()); + showTblsDesc = new showTablesDesc(getContext().getResFile()); } - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showTblsDesc), conf)); - setFetchTask(createFetchTask(showTblsDesc.getSchema())); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), showTblsDesc), getHiveConf())); + getPhysicalPlan().setFetchTask(createFetchTask(showTblsDesc.getSchema())); } private void analyzeShowTableStatus(ASTNode ast) throws SemanticException { showTableStatusDesc showTblStatusDesc; - String tableNames = unescapeIdentifier(ast.getChild(0).getText()); + String tableNames = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; int children = ast.getChildCount(); HashMap partSpec = null; @@ -331,16 +329,16 @@ for (int i = 1; i < children; i++) { ASTNode child = (ASTNode) ast.getChild(i); if(child.getToken().getType() == HiveParser.Identifier) - dbName = unescapeIdentifier(child.getText()); + dbName = ParseUtils.unescapeIdentifier(child.getText()); else if (child.getToken().getType() == HiveParser.TOK_PARTSPEC) partSpec = getPartSpec(child); else throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); } } - showTblStatusDesc = new showTableStatusDesc(ctx.getResFile(), dbName, tableNames, partSpec); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showTblStatusDesc), conf)); - setFetchTask(createFetchTask(showTblStatusDesc.getSchema())); + showTblStatusDesc = new showTableStatusDesc(getContext().getResFile(), dbName, tableNames, partSpec); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), showTblStatusDesc), getHiveConf())); + getPhysicalPlan().setFetchTask(createFetchTask(showTblStatusDesc.getSchema())); } /** @@ -353,14 +351,14 @@ throws SemanticException { showFunctionsDesc showFuncsDesc; if (ast.getChildCount() == 1) { - String funcNames = unescapeSQLString(ast.getChild(0).getText()); - showFuncsDesc = new showFunctionsDesc(ctx.getResFile(), funcNames); + String funcNames = ParseUtils.unescapeSQLString(ast.getChild(0).getText()); + showFuncsDesc = new showFunctionsDesc(getContext().getResFile(), funcNames); } else { - showFuncsDesc = new showFunctionsDesc(ctx.getResFile()); + showFuncsDesc = new showFunctionsDesc(getContext().getResFile()); } - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), showFuncsDesc), conf)); - setFetchTask(createFetchTask(showFuncsDesc.getSchema())); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), showFuncsDesc), getHiveConf())); + getPhysicalPlan().setFetchTask(createFetchTask(showFuncsDesc.getSchema())); } /** @@ -384,34 +382,34 @@ throw new SemanticException("Unexpected Tokens at DESCRIBE FUNCTION"); } - descFunctionDesc descFuncDesc = new descFunctionDesc(ctx.getResFile(), + descFunctionDesc descFuncDesc = new descFunctionDesc(getContext().getResFile(), funcName, isExtended); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), descFuncDesc), conf)); - setFetchTask(createFetchTask(descFuncDesc.getSchema())); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), descFuncDesc), getHiveConf())); + getPhysicalPlan().setFetchTask(createFetchTask(descFuncDesc.getSchema())); } private void analyzeAlterTableRename(ASTNode ast) throws SemanticException { alterTableDesc alterTblDesc = new alterTableDesc( - unescapeIdentifier(ast.getChild(0).getText()), - unescapeIdentifier(ast.getChild(1).getText())); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + ParseUtils.unescapeIdentifier(ast.getChild(0).getText()), + ParseUtils.unescapeIdentifier(ast.getChild(1).getText())); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), alterTblDesc), getHiveConf())); } private void analyzeAlterTableModifyCols(ASTNode ast, alterTableTypes alterType) throws SemanticException { - String tblName = unescapeIdentifier(ast.getChild(0).getText()); + String tblName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); List newCols = getColumns((ASTNode)ast.getChild(1)); alterTableDesc alterTblDesc = new alterTableDesc(tblName, newCols, alterType); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), alterTblDesc), getHiveConf())); } private void analyzeAlterTableDropParts(ASTNode ast) throws SemanticException { - String tblName = unescapeIdentifier(ast.getChild(0).getText()); + String tblName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); // get table metadata List> partSpecs = getPartitionSpecs(ast); dropTableDesc dropTblDesc = new dropTableDesc(tblName, partSpecs); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropTblDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), dropTblDesc), getHiveConf())); } /** @@ -424,7 +422,7 @@ private void analyzeAlterTableAddParts(CommonTree ast) throws SemanticException { - String tblName = unescapeIdentifier(ast.getChild(0).getText());; + String tblName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText());; //partition name to value List> partSpecs = getPartitionSpecs(ast); @@ -442,7 +440,7 @@ AddPartitionDesc addPartitionDesc = new AddPartitionDesc(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart, currentLocation); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), addPartitionDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), addPartitionDesc), getHiveConf())); } //create new partition, set values currentLocation = null; @@ -450,7 +448,7 @@ break; case HiveParser.TOK_PARTITIONLOCATION: //if location specified, set in partition - currentLocation = unescapeSQLString(child.getChild(0).getText()); + currentLocation = ParseUtils.unescapeSQLString(child.getChild(0).getText()); break; default: throw new SemanticException("Unknown child: " + child); @@ -462,7 +460,7 @@ AddPartitionDesc addPartitionDesc = new AddPartitionDesc(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart, currentLocation); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), addPartitionDesc), conf)); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), addPartitionDesc), getHiveConf())); } } @@ -478,14 +476,14 @@ if(ast.getChildCount() > 0) { repair = ast.getChild(0).getType() == HiveParser.KW_REPAIR; if (!repair) { - tableName = unescapeIdentifier(ast.getChild(0).getText()); + tableName = ParseUtils.unescapeIdentifier(ast.getChild(0).getText()); } else if (ast.getChildCount() > 1) { - tableName = unescapeIdentifier(ast.getChild(1).getText()); + tableName = ParseUtils.unescapeIdentifier(ast.getChild(1).getText()); } } List> specs = getPartitionSpecs(ast); - MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile(), repair); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), checkDesc), conf)); + MsckDesc checkDesc = new MsckDesc(tableName, specs, getContext().getResFile(), repair); + getPhysicalPlan().addRootTask(TaskFactory.get(new DDLWork(getPhysicalPlan(), checkDesc), getHiveConf())); } /** @@ -505,7 +503,7 @@ Map partSpec = new LinkedHashMap(); for (int i = 0; i < partspec.getChildCount(); ++i) { CommonTree partspec_val = (CommonTree) partspec.getChild(i); - String val = stripQuotes(partspec_val.getChild(1).getText()); + String val = ParseUtils.stripQuotes(partspec_val.getChild(1).getText()); partSpec.put(partspec_val.getChild(0).getText().toLowerCase(), val); } partSpecs.add(partSpec); Index: ql/src/java/org/apache/hadoop/hive/ql/Driver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java (revision 836131) +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java (working copy) @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.parse.ParseDriver; +import org.apache.hadoop.hive.ql.parse.PhysicalPlan; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; @@ -75,11 +76,11 @@ private String errorMessage; private String SQLState; - public int countJobs(List> tasks) { + public int countJobs(Collection> tasks) { return countJobs(tasks, new ArrayList>()); } - public int countJobs(List> tasks, List> seenTasks) { + public int countJobs(Collection> tasks, List> seenTasks) { if (tasks == null) return 0; int jobs = 0; @@ -119,14 +120,14 @@ public Schema getSchema() throws Exception { Schema schema; try { - if (plan != null && plan.getPlan().getFetchTask() != null) { - BaseSemanticAnalyzer sem = plan.getPlan(); + if (plan != null && plan.getPlan().getPhysicalPlan().getFetchTask() != null) { + PhysicalPlan physicalPlan = plan.getPlan().getPhysicalPlan(); - if (!sem.getFetchTaskInit()) { - sem.setFetchTaskInit(true); - sem.getFetchTask().initialize(conf, plan); + if (!physicalPlan.getFetchTaskInit()) { + physicalPlan.setFetchTaskInit(true); + physicalPlan.getFetchTask().initialize(conf, plan); } - FetchTask ft = (FetchTask) sem.getFetchTask(); + FetchTask ft = (FetchTask) physicalPlan.getFetchTask(); tableDesc td = ft.getTblDesc(); // partitioned tables don't have tableDesc set on the FetchTask. Instead @@ -407,15 +408,16 @@ resStream = null; BaseSemanticAnalyzer sem = plan.getPlan(); + PhysicalPlan physicalPlan = sem.getPhysicalPlan(); // Get all the pre execution hooks and execute them. for(PreExecute peh: getPreExecHooks()) { peh.run(SessionState.get(), - sem.getInputs(), sem.getOutputs(), + physicalPlan.getInputs(), physicalPlan.getOutputs(), UnixUserGroupInformation.readFromConf(conf, UnixUserGroupInformation.UGI_PROPERTY_NAME)); } - int jobs = countJobs(sem.getRootTasks()); + int jobs = countJobs(physicalPlan.getRootTasks()); if (jobs > 0) { console.printInfo("Total MapReduce jobs = " + jobs); } @@ -433,7 +435,7 @@ // while taking the job to run from the front of the list Queue> runnable = new LinkedList>(); - for (Task rootTask : sem.getRootTasks()) { + for (Task rootTask : sem.getPhysicalPlan().getRootTasks()) { if (runnable.offer(rootTask) == false) { LOG.error("Could not insert the first task into the queue"); return (1); @@ -489,7 +491,8 @@ // Get all the post execution hooks and execute them. for(PostExecute peh: getPostExecHooks()) { peh.run(SessionState.get(), - sem.getInputs(), sem.getOutputs(), + sem.getPhysicalPlan().getInputs(), + sem.getPhysicalPlan().getOutputs(), UnixUserGroupInformation.readFromConf(conf, UnixUserGroupInformation.UGI_PROPERTY_NAME)); } @@ -529,13 +532,13 @@ } public boolean getResults(Vector res) throws IOException { - if (plan != null && plan.getPlan().getFetchTask() != null) { - BaseSemanticAnalyzer sem = plan.getPlan(); - if (!sem.getFetchTaskInit()) { - sem.setFetchTaskInit(true); - sem.getFetchTask().initialize(conf, plan); + if (plan != null && plan.getPlan().getPhysicalPlan().getFetchTask() != null) { + PhysicalPlan physicalPlan = plan.getPlan().getPhysicalPlan(); + if (!physicalPlan.getFetchTaskInit()) { + physicalPlan.setFetchTaskInit(true); + physicalPlan.getFetchTask().initialize(conf, plan); } - FetchTask ft = (FetchTask) sem.getFetchTask(); + FetchTask ft = (FetchTask) physicalPlan.getFetchTask(); ft.setMaxRows(maxRows); return ft.fetch(res); }