Index: ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/tool/TestLineageInfo.java (revision 0) @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.tool; + +import java.util.TreeSet; +import java.util.Vector; + +import org.apache.hadoop.hive.ql.tools.LineageInfo; + +import junit.framework.TestCase; + +public class TestLineageInfo extends TestCase { + + public void testSimpleQuery(){ + LineageInfo lep = new LineageInfo(); + try{ + lep.getLineageInfo( + "INSERT OVERWRITE TABLE dest1 partition (ds = '111') SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1) s WHERE s.ds='2008-04-08' and s.hr='11'"); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("srcpart"); + o.add("dest1"); + if ( !i.equals(lep.getInputTableList())){ + fail("Input table not same"); + } + if (! o.equals(lep.getOutputTableList())){ + fail("Output table not same"); + } + + } + catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + + } + + public void testSimpleQuery2(){ + LineageInfo lep = new LineageInfo(); + try{ + lep.getLineageInfo( + "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src SELECT src.* WHERE src.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*" + ); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("src"); + + if ( !i.equals(lep.getInputTableList())){ + fail("Input table not same"); + } + if (! o.equals(lep.getOutputTableList())){ + fail("Output table not same"); + } + + } + catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + + } + + public void testSimpleQuery3(){ + LineageInfo lep = new LineageInfo(); + try{ + lep.getLineageInfo( + "FROM (FROM src select src.key, src.value WHERE src.key < 10 UNION ALL FROM src1 SELECT src1.* WHERE src1.key > 10 ) unioninput INSERT OVERWRITE DIRECTORY '../../../../build/contrib/hive/ql/test/data/warehouse/union.out' SELECT unioninput.*" + ); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("src"); + i.add("src1"); + if ( !i.equals(lep.getInputTableList())){ + fail("Input table not same"); + } + if (! o.equals(lep.getOutputTableList())){ + fail("Output table not same"); + } + + } + catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + + } + + public void testSimpleQuery4(){ + LineageInfo lep = new LineageInfo(); + try{ + lep.getLineageInfo( + "FROM ( FROM ( FROM src1 src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20) a RIGHT OUTER JOIN ( FROM src2 src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25) b ON (a.c1 = b.c3) SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4) c SELECT c.c1, c.c2, c.c3, c.c4" ); + TreeSet i = new TreeSet(); + TreeSet o = new TreeSet(); + i.add("src1"); + i.add("src2"); + if ( !i.equals(lep.getInputTableList())){ + fail("Input table not same"); + } + if (! o.equals(lep.getOutputTableList())){ + fail("Output table not same"); + } + } + catch (Exception e) { + e.printStackTrace(); + fail("Failed"); + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java (revision 0) @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.hadoop.hive.ql.tools; + +import java.io.IOException; +import java.util.TreeSet; + +import org.antlr.runtime.tree.CommonTree; +import org.apache.hadoop.hive.ql.parse.ASTEvent; +import org.apache.hadoop.hive.ql.parse.ASTEventProcessor; +import org.apache.hadoop.hive.ql.parse.DefaultASTEventDispatcher; +import org.apache.hadoop.hive.ql.parse.DefaultASTProcessor; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.ParseDriver; +import org.apache.hadoop.hive.ql.parse.ParseException; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * + * This class prints out the lineage info. + * It takes sql as input and prints lineage info. + * Currently this prints only input and output tables for a given sql. + * Later we can expand to add join tables etc. + * + */ +public class LineageInfo implements ASTEventProcessor { + + /** + * Stores input tables in sql + */ + TreeSet inputTableList = new TreeSet(); + /** + * Stores output tables in sql + */ + TreeSet OutputTableList= new TreeSet(); + + /** + * + * @return java.util.TreeSet + */ + public TreeSet getInputTableList() { + return inputTableList; + } + + /** + * @return java.util.TreeSet + */ + public TreeSet getOutputTableList() { + return OutputTableList; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.hive.ql.parse.ASTEventProcessor#process(org.antlr.runtime.tree.CommonTree) + */ + public void process(CommonTree pt) { + + switch (pt.getToken().getType()) { + + case HiveParser.TOK_DESTINATION: { + if (pt.getChild(0).getType() == HiveParser.TOK_TAB) { + OutputTableList.add(pt.getChild(0).getChild(0).getText()) ; + } + + } + break; + case HiveParser.TOK_FROM: { + CommonTree tabRef = (CommonTree) pt.getChild(0); + String table_name = tabRef.getChild(0).getText(); + inputTableList.add(table_name); + } + break; + } + } + /** + * parses given query and gets the lineage info. + * @param query + * @throws ParseException + */ + public void getLineageInfo(String query) throws ParseException + { + + /* + * Get the AST tree + */ + ParseDriver pd = new ParseDriver(); + CommonTree tree = pd.parse(query); + + while ((tree.getToken() == null) && (tree.getChildCount() > 0)) { + tree = (CommonTree) tree.getChild(0); + } + + /* + * initialize Event Processor and dispatcher. + */ + inputTableList.clear(); + OutputTableList.clear(); + DefaultASTEventDispatcher dispatcher = new DefaultASTEventDispatcher(); + dispatcher.register(ASTEvent.SRC_TABLE, this); + dispatcher.register(ASTEvent.DESTINATION, this); + + DefaultASTProcessor eventProcessor = new DefaultASTProcessor(); + + eventProcessor.setDispatcher(dispatcher); + eventProcessor.process(tree); + } + + public static void main(String[] args) throws IOException, ParseException, + SemanticException { + + String query = args[0]; + + LineageInfo lep = new LineageInfo(); + + lep.getLineageInfo(query); + + for (String tab : lep.getInputTableList()) { + System.out.println("InputTable=" + tab); + } + + for (String tab : lep.getOutputTableList()) { + System.out.println("OutputTable=" + tab); + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java (revision 726077) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultASTEventDispatcher.java (working copy) @@ -41,7 +41,7 @@ /** * Constructs the default event dispatcher */ - DefaultASTEventDispatcher() { + public DefaultASTEventDispatcher() { dispatchMap = new HashMap>(); }