Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 7603) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 7644) @@ -27,14 +27,18 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; +import java.util.Stack; import java.util.TreeSet; -import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.Tree; import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; @@ -42,6 +46,7 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -83,6 +88,7 @@ import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.DummyPartition; @@ -96,6 +102,7 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; @@ -105,7 +112,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -126,6 +132,7 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; @@ -148,14 +155,13 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -163,16 +169,15 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.hive.metastore.TableType; /** * Implementation of the semantic analyzer. @@ -1172,6 +1177,370 @@ qb.rewriteViewToSubq(alias, tab_name, qbexpr); } + /** + * ASTTransformer context + */ + public static class ASTTransformerCtx implements NodeProcessorCtx { + protected static final Log LOG = LogFactory.getLog(ASTTransformerCtx.class + .getName()); + private final Map tabToRR = new HashMap (); + + private final HiveConf conf; + private final QB qb; + private Map info; + /** + * Get Row Resolver for table + */ + public RowResolver getRowResolver(String alias) { + if (alias == null) { + return null; + } + if (!tabToRR.containsKey(alias)){ + Table tab = qb.getMetaData().getSrcForAlias(alias); + if (tab == null) { + return null; + } + RowResolver rwsch = new RowResolver(); + try { + StructObjectInspector rowObjectInspector = (StructObjectInspector) tab + .getDeserializer().getObjectInspector(); + List fields = rowObjectInspector + .getAllStructFieldRefs(); + for (int i = 0; i < fields.size(); i++) { + rwsch.put(alias, fields.get(i).getFieldName(), new ColumnInfo(fields + .get(i).getFieldName(), TypeInfoUtils + .getTypeInfoFromObjectInspector(fields.get(i) + .getFieldObjectInspector()), alias, false)); + } + } catch (SerDeException e) { + throw new RuntimeException(e); + } + // Hack!! - refactor once the metadata APIs with types are ready + // Finally add the partitioning columns + for (FieldSchema part_col : tab.getPartCols()) { + LOG.trace("Adding partition col: " + part_col); + // TODO: use the right type by calling part_col.getType() instead of + // String.class + rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), + TypeInfoFactory.stringTypeInfo, alias, true)); + } + + //put all virutal columns in RowResolver. + Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); + //use a list for easy cumtomize + List vcList = new ArrayList(); + while (vcs.hasNext()) { + VirtualColumn vc = vcs.next(); + rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), + vc.getTypeInfo(), alias, true, vc.getIsHidden())); + vcList.add(vc); + } + + tabToRR.put(alias, rwsch); + } + return tabToRR.get(alias); + } + + /** + * Constructor + */ + public ASTTransformerCtx(HiveConf conf, QB qb){ + this.conf = conf; + this.qb = qb; + } + + public void setMetaInfoMap(Map info){ + this.info = info; + } + + public Map getMetaInfoMap(){ + return info; + } + } + /** + * A wrapper of all the valuable info's collected from each AST node + * in the 1st phase of transform + */ + public static class MetaInfo { + + public static class TableName { + String db; + String tbAlias; + public TableName(String db, String tbAlias){ + this.db=db; + this.tbAlias = tbAlias; + } + public TableName(String tbAlias){ + this("",tbAlias); + //if tbAlias contains . split it + if (tbAlias.contains(".")){ + String[] items=tbAlias.split("."); + this.db = items[0]; + this.tbAlias = items[1]; + } + } + + @Override + public String toString(){ + return tbAlias; + } + } + + private Set tables; + + public Set getTableAliases(){ + return tables; + } + + public void addTableAliases(Set tableAliases){ + if(tableAliases == null) { + return; + } + if (this.tables == null){ + tables = new HashSet(); + } + tables.addAll(tableAliases); + } + + @Override + public String toString(){ + StringBuilder sb = new StringBuilder(); + if (tables!=null){ + sb.append("Table aliases : "); + for (TableName tb: tables){ + sb.append(tb.toString()+","); + } + sb.append("\n"); + } + else { + sb.append("No Aliases found"); + } + return sb.toString(); + } + } + + + /** + * The Factory for creating information collecting processor and subtree replacing + * processors. + */ + public static class ASTTransformerFactory { + + protected static final Log LOG = LogFactory.getLog(ASTTransformerFactory.class + .getName()); + + private ASTTransformerFactory() { + // prevent instantiation + } + + + /** + * Processor to replace part of AST tree. + */ + public static class ReplaceSubTreeProcessor implements NodeProcessor { + + private ASTNode buildSelExprSubTree(String tableAlias, String col){ + ASTNode selexpr = new ASTNode (new CommonToken(HiveParser.TOK_SELEXPR,"TOK_SELEXPR")); + ASTNode tableOrCol = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL,"TOK_TABLE_OR_COL")); + ASTNode dot = new ASTNode(new CommonToken(HiveParser.DOT,".")); + tableOrCol.addChild(new ASTNode(new CommonToken(HiveParser.Identifier,tableAlias))); + dot.addChild(tableOrCol); + dot.addChild(new ASTNode(new CommonToken(HiveParser.Identifier,col))); + selexpr.addChild(dot); + return selexpr; + } + + private void addSelExprSubTree(ASTNode selectDI, String tableAlias, List columnNames){ + for (String col : columnNames){ + selectDI.addChild(buildSelExprSubTree(tableAlias,col)); + } + } + + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + ASTNode node= (ASTNode)nd; + ASTTransformerCtx ctx= (ASTTransformerCtx)procCtx; + if (node.getType() == HiveParser.TOK_SELECTDI && + node.getChildCount() == 1 && + node.getChild(0).getType()== HiveParser.TOK_SELEXPR && + node.getChild(0).getChildCount() == 1 && + node.getChild(0).getChild(0).getType()==HiveParser.TOK_ALLCOLREF){ + + LOG.info("Transforming AST : SELECT DISTINCT *"); + ASTNode allcolref = (ASTNode)node.getChild(0).getChild(0); + + //replace "select distinct t.* from t" + //with "select distinct t.a,t.b,t.c... from t" + //collect table name from "from clause" in the query + //given that t is a table alias + if (allcolref.getChildCount() == 1 && + allcolref.getChild(0).getType() == HiveParser.TOK_TABNAME){ + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(allcolref + .getChild(0).getChild(0).getText()); + RowResolver inputRR = ((ASTTransformerCtx)procCtx).getRowResolver(tableAlias); + node.deleteChild(0); + if (inputRR != null) { + List columnNames = inputRR.getNonHiddenColumnNames(-1); + addSelExprSubTree(node,tableAlias,columnNames); + } else { + throw new SemanticException ("unrecognized table alias "+tableAlias+" in select distinct"); + } + } else if (allcolref.getChildCount() == 0){ + //replace select distinct * from t + //with "select distinct t.a,t.b,t.c... from t" + //collect table name from "from clause" in the query + node.deleteChild(0); + //go up the stack to find the nearest ancestor TOK_QUERY + //a bit hack here + int last = stack.size(); + ASTNode n = null; + for (int i=last-1; i>=0; i--){ + if ( ((ASTNode)stack.get(i)).getType() == HiveParser.TOK_QUERY){ + n = (ASTNode)stack.get(i); + break; + } + } + if (n == null) { + return null; + } + + Map meta = ctx.getMetaInfoMap(); + for (MetaInfo.TableName tb : ((MetaInfo)meta.get(n)).getTableAliases()){ + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(tb.toString()); + RowResolver inputRR = ((ASTTransformerCtx)procCtx).getRowResolver(tableAlias); + if (inputRR != null) { + List columnNames = inputRR.getNonHiddenColumnNames(-1); + addSelExprSubTree(node,tableAlias,columnNames); + } else { + throw new SemanticException ("unrecognized table alias "+tableAlias+" in select distinct"); + } + } + } + + } + return null; + } + + } + + /** + * Factory method to get ReplaceSubTreeProcessor. + * + * @return ReplaceSubTreeProcessor. + */ + public static ReplaceSubTreeProcessor getReplaceSubTreeProcessor(){ + return new ReplaceSubTreeProcessor(); + } + + /** + * Processor for to collect table alias info. + */ + public static class TableAliasCollectorProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ASTNode node= (ASTNode)nd; + MetaInfo meta = new MetaInfo(); + //if matches pattern select distinct * or select distinct t.* + if (node.getType() == HiveParser.TOK_TABNAME){ + Set names = new HashSet(); + if(node.getChildCount() == 1){ + names.add(new MetaInfo.TableName(node.getChild(0).getText())); + } else if (node.getChildCount() == 2){ + names.add(new MetaInfo.TableName(node.getChild(0).getText(),node.getChild(1).getText())); + } + meta.addTableAliases(names); + return meta; + } else if (node.getType() == HiveParser.TOK_SUBQUERY){ + //return empty meta + return meta; + } + + //for other children, add table aliases from children + if (nodeOutputs!=null){ + for (Object out : nodeOutputs){ + meta.addTableAliases(((MetaInfo)out).getTableAliases()); + } + } + return meta; + } + } + + /** + * Factory method to get TableAliasCollectorProcessor. + * + * @return TableAliasCollectorProcessor. + */ + public static TableAliasCollectorProcessor getTableAliasCollectorProcessor() { + return new TableAliasCollectorProcessor(); + } + + /** + * Default node processor. + */ + public static class DefaultNodeProcessor implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + return nodeOutputs; //return nodeOutputs as array + } + } + + /** + * Factory method to get DefaultNodeProcessor. + * + * @return DefaultNodeProcessor. + */ + public static DefaultNodeProcessor getDefaultNodeProcessor(){ + return new DefaultNodeProcessor(); + } + + /** + * The transform process came in two phase, each require a full traversal of the AST. + * 1. collect necessary information and build MetaInfo maps. + * 2. Use the information collected to transform AST nodes. + */ + public static void transform(ASTNode root, ASTTransformerCtx ctx) throws SemanticException { + // The 1st traversal, which collect information needed for the 2nd round. + Map collectRules = new LinkedHashMap(); + Dispatcher disp = new DefaultRuleDispatcher(getTableAliasCollectorProcessor(), + collectRules, ctx); + GraphWalker agw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(root); + HashMap metaInfoMap = new HashMap(); + agw.startWalking(topNodes, metaInfoMap); + + LOG.debug(metaInfoMap.toString()); + + ctx.setMetaInfoMap(metaInfoMap); + + // The 2nd traversal, which transforms subtrees + // at present we only added one rule to replace SELECTDI * with real columns + // later more transform may be added. + Map replaceRules = new LinkedHashMap(); + replaceRules.put(new RuleRegExp("R1", HiveParser.TOK_SELECTDI + "%"), + getReplaceSubTreeProcessor()); + + disp = new DefaultRuleDispatcher(getDefaultNodeProcessor(), + replaceRules, ctx); + agw = new DefaultGraphWalker(disp); + topNodes = new ArrayList(); + topNodes.add(root); + HashMap nodeOutputs = new HashMap(); + agw.startWalking(topNodes, nodeOutputs); + } + } + + private void transformAST(QB qb, ASTNode nd) throws SemanticException{ + ASTTransformerCtx ctx = new ASTTransformerCtx(conf, qb); + ASTTransformerFactory.transform(nd, ctx); + } + private boolean isPresent(String[] list, String elem) { for (String s : list) { if (s.toLowerCase().equals(elem)) { @@ -7524,6 +7893,9 @@ getMetaData(qb); LOG.info("Completed getting MetaData in Semantic Analysis"); + //Add another stage to allow adjusting AST + transformAST(qb,ast); + LOG.info("Completed AST Adjustment"); // Save the result schema derived from the sink operator produced // by genPlan. This has the correct column names, which clients // such as JDBC would prefer instead of the c0, c1 we'll end