Index: ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (revision 730259) +++ ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (working copy) @@ -28,86 +28,101 @@ /** * Checks whether the test outputs match the expected outputs - * @param lep The LineageInfo extracted from the test - * @param i The set of input tables - * @param o The set of output tables + * + * @param lep + * The LineageInfo extracted from the test + * @param i + * The set of input tables + * @param o + * The set of output tables */ private void checkOutput(LineageInfo lep, TreeSet i, TreeSet o) { - - if ( !i.equals(lep.getInputTableList())){ + + if (!i.equals(lep.getInputTableList())) { fail("Input table not same"); } - if (! o.equals(lep.getOutputTableList())){ + if (!o.equals(lep.getOutputTableList())) { fail("Output table not same"); - } + } } - - public void testSimpleQuery(){ - LineageInfo lep = new LineageInfo(); - try{ - lep.getLineageInfo( - "INSERT OVERWRITE TABLE dest1 partition (ds = '111') SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'"); - TreeSet i = new TreeSet(); - TreeSet o = new TreeSet(); - i.add("srcpart"); - o.add("dest1"); - checkOutput(lep, i, o); - } - catch (Exception e) { - e.printStackTrace(); - fail("Failed"); - } - } - - public void testSimpleQuery2(){ - LineageInfo lep = new LineageInfo(); - try{ - lep.getLineageInfo( - "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*" - ); - TreeSet i = new TreeSet(); - TreeSet o = new TreeSet(); - i.add("src"); - checkOutput(lep, i, o); - } - catch (Exception e) { - e.printStackTrace(); - fail("Failed"); - } - } - - public void testSimpleQuery3(){ - LineageInfo lep = new LineageInfo(); - try{ - lep.getLineageInfo( - "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*" - ); - TreeSet i = new TreeSet(); - TreeSet o = new TreeSet(); - i.add("src"); - i.add("src1"); - checkOutput(lep, i, o); - } - catch (Exception e) { - e.printStackTrace(); - fail("Failed"); - } - } - - public void testSimpleQuery4(){ - LineageInfo lep = new LineageInfo(); - try{ - lep.getLineageInfo( - "FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4" ); - TreeSet i = new TreeSet(); - TreeSet o = new TreeSet(); - i.add("src1"); - i.add("src2"); - checkOutput(lep, i, o); - } - catch (Exception e) { - e.printStackTrace(); - fail("Failed"); - } - } + + public void testSimpleQuery() { + LineageInfo lep = new LineageInfo(); + try { + lep + .getLineageInfo("INSERT OVERWRITE TABLE dest1 partition (ds = '111') SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'"); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("srcpart"); + o.add("dest1"); + checkOutput(lep, i, o); + } catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + } + + public void testSimpleQuery2() { + LineageInfo lep = new LineageInfo(); + try { + lep + .getLineageInfo("FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("src"); + checkOutput(lep, i, o); + } catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + } + + public void testSimpleQuery3() { + LineageInfo lep = new LineageInfo(); + try { + lep + .getLineageInfo("FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*"); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("src"); + i.add("src1"); + checkOutput(lep, i, o); + } catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + } + + public void testSimpleQuery4() { + LineageInfo lep = new LineageInfo(); + try { + lep + .getLineageInfo("FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4"); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("src1"); + i.add("src2"); + checkOutput(lep, i, o); + } catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + } + + public void testSimpleQuery5() { + LineageInfo lep = new LineageInfo(); + try { + lep + .getLineageInfo("insert overwrite table x select a.y, b.y from a a full outer join b b on (a.x = b.y)"); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("a"); + i.add("b"); + o.add("x"); + checkOutput(lep, i, o); + } catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (revision 730259) +++ ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (working copy) @@ -33,7 +33,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; @@ -50,82 +49,75 @@ */ public class LineageInfo implements NodeProcessor { - /** - * Stores input tables in sql - */ - TreeSet inputTableList = new TreeSet(); - /** - * Stores output tables in sql - */ - TreeSet OutputTableList= new TreeSet(); + /** + * Stores input tables in sql + */ + TreeSet inputTableList = new TreeSet(); + /** + * Stores output tables in sql + */ + TreeSet OutputTableList= new TreeSet(); - /** - * - * @return java.util.TreeSet - */ - public TreeSet getInputTableList() { - return inputTableList; - } + /** + * + * @return java.util.TreeSet + */ + public TreeSet getInputTableList() { + return inputTableList; + } - /** - * @return java.util.TreeSet - */ - public TreeSet getOutputTableList() { - return OutputTableList; - } + /** + * @return java.util.TreeSet + */ + public TreeSet getOutputTableList() { + return OutputTableList; + } - /** - * Implements the process method for the NodeProcessor interface. - */ + /** + * Implements the process method for the NodeProcessor interface. + */ @Override public void process(Node nd, NodeProcessorCtx procCtx) - throws SemanticException { + throws SemanticException { ASTNode pt = (ASTNode)nd; + switch (pt.getToken().getType()) { - case HiveParser.TOK_DESTINATION: { - if (pt.getChild(0).getType() == HiveParser.TOK_TAB) { - OutputTableList.add(pt.getChild(0).getChild(0).getText()) ; - } + case HiveParser.TOK_TAB: + OutputTableList.add(pt.getChild(0).getText()) ; + break; + case HiveParser.TOK_TABREF: + String table_name = ((ASTNode)pt.getChild(0)).getText(); + inputTableList.add(table_name); + break; } - break; - case HiveParser.TOK_FROM: { - if (((ASTNode)pt.getChild(0)).getToken().getType() == HiveParser.TOK_TABREF) { - ASTNode tabRef = (ASTNode) pt.getChild(0); - String table_name = tabRef.getChild(0).getText(); - inputTableList.add(table_name); - } - } - break; - } - + } - - /** - * parses given query and gets the lineage info. - * @param query - * @throws ParseException - */ - public void getLineageInfo(String query) throws ParseException, SemanticException - { - /* - * Get the AST tree - */ - ParseDriver pd = new ParseDriver(); - ASTNode tree = pd.parse(query); + /** + * parses given query and gets the lineage info. + * @param query + * @throws ParseException + */ + public void getLineageInfo(String query) throws ParseException, SemanticException { - while ((tree.getToken() == null) && (tree.getChildCount() > 0)) { - tree = (ASTNode) tree.getChild(0); - } + /* + * Get the AST tree + */ + ParseDriver pd = new ParseDriver(); + ASTNode tree = pd.parse(query); - /* - * initialize Event Processor and dispatcher. - */ - inputTableList.clear(); - OutputTableList.clear(); - + while ((tree.getToken() == null) && (tree.getChildCount() > 0)) { + tree = (ASTNode) tree.getChild(0); + } + + /* + * initialize Event Processor and dispatcher. + */ + inputTableList.clear(); + OutputTableList.clear(); + // create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher // generates the plan from the operator tree Map rules = new LinkedHashMap(); @@ -133,28 +125,28 @@ // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(this, rules, null); GraphWalker ogw = new DefaultGraphWalker(disp); - + // Create a list of topop nodes ArrayList topNodes = new ArrayList(); topNodes.add(tree); ogw.startWalking(topNodes); - } + } - public static void main(String[] args) throws IOException, ParseException, - SemanticException { + public static void main(String[] args) throws IOException, ParseException, + SemanticException { - String query = args[0]; + String query = args[0]; - LineageInfo lep = new LineageInfo(); + LineageInfo lep = new LineageInfo(); - lep.getLineageInfo(query); + lep.getLineageInfo(query); - for (String tab : lep.getInputTableList()) { - System.out.println("InputTable=" + tab); - } + for (String tab : lep.getInputTableList()) { + System.out.println("InputTable=" + tab); + } - for (String tab : lep.getOutputTableList()) { - System.out.println("OutputTable=" + tab); - } - } + for (String tab : lep.getOutputTableList()) { + System.out.println("OutputTable=" + tab); + } + } }