Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1513727) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -133,9 +133,9 @@ import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag.GenericUDFLag; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag.GenericUDFLead; -import org.apache.hadoop.hive.ql.udf.ptf.NPath.NPathResolver; import org.apache.hadoop.hive.ql.udf.ptf.Noop.NoopResolver; import org.apache.hadoop.hive.ql.udf.ptf.NoopWithMap.NoopWithMapResolver; +import org.apache.hadoop.hive.ql.udf.ptf.RegexPath.RegexPathResolver; import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver; import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver; import org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath; @@ -450,7 +450,7 @@ registerTableFunction(NOOP_TABLE_FUNCTION, NoopResolver.class); registerTableFunction(NOOP_MAP_TABLE_FUNCTION, NoopWithMapResolver.class); registerTableFunction(WINDOWING_TABLE_FUNCTION, WindowingTableFunctionResolver.class); - registerTableFunction("npath", NPathResolver.class); + registerTableFunction("regex_path", RegexPathResolver.class); } public static void registerTemporaryUDF(String functionName, Index: ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NPath.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NPath.java (revision 1513727) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NPath.java (working copy) @@ -1,919 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf.ptf; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.PTFPartition; -import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.PTFTranslator; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.TypeCheckCtx; -import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.PTFDesc; -import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef; -import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFInputDef; -import org.apache.hadoop.hive.ql.plan.PTFDesc.PartitionedTableFunctionDef; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; - -/** - * return rows that meet a specified pattern. Use symbols to specify a list of expressions - * to match. - * Pattern is used to specify a Path. The results list can contain expressions based on - * the input columns and also the matched Path. - *
    - *
  1. pattern: pattern for the Path. Path is 'dot' separated list of symbols. - * Each element is treated as a symbol. Elements that end in '*' or '+' are interpreted with - * the usual meaning of zero or more, one or more respectively. For e.g. - * "LATE.EARLY*.ONTIMEOREARLY" implies a sequence of flights - * where the first occurrence was LATE, followed by zero or more EARLY flights, - * followed by a ONTIME or EARLY flight. - *
  2. symbols specify a list of name, expression pairs. For e.g. - * 'LATE', arrival_delay > 0, 'EARLY', arrival_delay < 0 , 'ONTIME', arrival_delay == 0. - * These symbols can be used in the Pattern defined above. - *
  3. resultSelectList specified as a select list. - * The expressions in the selectList are evaluated in the context where all the - * input columns are available, plus the attribute - * "tpath" is available. Path is a collection of rows that represents the matching Path. - *
- */ -public class NPath extends TableFunctionEvaluator -{ - private transient String patternStr; - private transient SymbolsInfo symInfo; - private transient String resultExprStr; - private transient SymbolFunction syFn; - private ResultExprInfo resultExprInfo; - /* - * the names of the Columns of the input to NPath. Used to setup the tpath Struct column. - */ - private HashMap inputColumnNamesMap; - - @Override - public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws HiveException - { - while (pItr.hasNext()) - { - Object iRow = pItr.next(); - - SymbolFunctionResult syFnRes = SymbolFunction.match(syFn, iRow, pItr); - if (syFnRes.matches ) - { - int sz = syFnRes.nextRow - (pItr.getIndex() - 1); - Object selectListInput = NPath.getSelectListInput(iRow, - tDef.getInput().getOutputShape().getOI(), pItr, sz); - ArrayList oRow = new ArrayList(); - for(ExprNodeEvaluator resExprEval : resultExprInfo.resultExprEvals) - { - oRow.add(resExprEval.evaluate(selectListInput)); - } - outP.append(oRow); - } - } - } - - static void throwErrorWithSignature(String message) throws SemanticException - { - throw new SemanticException(String.format( - "NPath signature is: SymbolPattern, one or more SymbolName, " + - "expression pairs, the result expression as a select list. Error %s", - message)); - } - - public HashMap getInputColumnNames() { - return inputColumnNamesMap; - } - - public void setInputColumnNames(HashMap inputColumnNamesMap) { - this.inputColumnNamesMap = inputColumnNamesMap; - } - - public static class NPathResolver extends TableFunctionResolver - { - - @Override - protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, - PartitionedTableFunctionDef tDef) - { - - return new NPath(); - } - - /** - *
    - *
  • check structure of Arguments: - *
      - *
    1. First arg should be a String - *
    2. then there should be an even number of Arguments: - * String, expression; expression should be Convertible to Boolean. - *
    3. finally there should be a String. - *
    - *
  • convert pattern into a NNode chain. - *
  • convert symbol args into a Symbol Map. - *
  • parse selectList into SelectList struct. The inputOI used to translate - * these expressions should be based on the - * columns in the Input, the 'path.attr' - *
- */ - @Override - public void setupOutputOI() throws SemanticException - { - NPath evaluator = (NPath) getEvaluator(); - PartitionedTableFunctionDef tDef = evaluator.getTableDef(); - - ArrayList args = tDef.getArgs(); - int argsNum = args == null ? 0 : args.size(); - - if ( argsNum < 4 ) - { - throwErrorWithSignature("at least 4 arguments required"); - } - - validateAndSetupPatternStr(evaluator, args); - validateAndSetupSymbolInfo(evaluator, args, argsNum); - validateAndSetupResultExprStr(evaluator, args, argsNum); - setupSymbolFunctionChain(evaluator); - - /* - * setup OI for input to resultExpr select list - */ - RowResolver selectListInputRR = NPath.createSelectListRR(evaluator, tDef.getInput()); - - /* - * parse ResultExpr Str and setup OI. - */ - ResultExpressionParser resultExprParser = - new ResultExpressionParser(evaluator.resultExprStr, selectListInputRR); - try { - resultExprParser.translate(); - } - catch(HiveException he) { - throw new SemanticException(he); - } - evaluator.resultExprInfo = resultExprParser.getResultExprInfo(); - StructObjectInspector OI = evaluator.resultExprInfo.resultOI; - - setOutputOI(OI); - } - /* - * validate and setup patternStr - */ - private void validateAndSetupPatternStr(NPath evaluator, - ArrayList args) throws SemanticException { - PTFExpressionDef symboPatternArg = args.get(0); - ObjectInspector symbolPatternArgOI = symboPatternArg.getOI(); - - if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) || - (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symbolPatternArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { - throwErrorWithSignature("Currently the symbol Pattern must be a Constant String."); - } - - evaluator.patternStr = ((ConstantObjectInspector)symbolPatternArgOI). - getWritableConstantValue().toString(); - } - - /* - * validate and setup SymbolInfo - */ - private void validateAndSetupSymbolInfo(NPath evaluator, - ArrayList args, - int argsNum) throws SemanticException { - int symbolArgsSz = argsNum - 2; - if ( symbolArgsSz % 2 != 0) - { - throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " + - "there are odd number of symbol args"); - } - - evaluator.symInfo = new SymbolsInfo(symbolArgsSz/2); - for(int i=1; i <= symbolArgsSz; i += 2) - { - PTFExpressionDef symbolNameArg = args.get(i); - ObjectInspector symbolNameArgOI = symbolNameArg.getOI(); - - if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) || - (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symbolNameArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { - throwErrorWithSignature( - String.format("Currently a Symbol Name(%s) must be a Constant String", - symbolNameArg.getExpressionTreeString())); - } - String symbolName = ((ConstantObjectInspector)symbolNameArgOI). - getWritableConstantValue().toString(); - - PTFExpressionDef symolExprArg = args.get(i+1); - ObjectInspector symolExprArgOI = symolExprArg.getOI(); - if ( (symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symolExprArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN ) - { - throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " + - "must be a boolean expression", symolExprArg.getExpressionTreeString())); - } - evaluator.symInfo.add(symbolName, symolExprArg); - } - } - - /* - * validate and setup resultExprStr - */ - private void validateAndSetupResultExprStr(NPath evaluator, - ArrayList args, - int argsNum) throws SemanticException { - PTFExpressionDef resultExprArg = args.get(argsNum - 1); - ObjectInspector resultExprArgOI = resultExprArg.getOI(); - - if ( !ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) || - (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)resultExprArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { - throwErrorWithSignature("Currently the result Expr parameter must be a Constant String."); - } - - evaluator.resultExprStr = ((ConstantObjectInspector)resultExprArgOI). - getWritableConstantValue().toString(); - } - - /* - * setup SymbolFunction chain. - */ - private void setupSymbolFunctionChain(NPath evaluator) throws SemanticException { - SymbolParser syP = new SymbolParser(evaluator.patternStr, - evaluator.symInfo.symbolExprsNames, - evaluator.symInfo.symbolExprsEvaluators, evaluator.symInfo.symbolExprsOIs); - syP.parse(); - evaluator.syFn = syP.getSymbolFunction(); - } - - @Override - public boolean transformsRawInput() - { - return false; - } - - @Override - public void initializeOutputOI() throws HiveException { - try { - NPath evaluator = (NPath) getEvaluator(); - PartitionedTableFunctionDef tDef = evaluator.getTableDef(); - - ArrayList args = tDef.getArgs(); - int argsNum = args.size(); - - validateAndSetupPatternStr(evaluator, args); - validateAndSetupSymbolInfo(evaluator, args, argsNum); - validateAndSetupResultExprStr(evaluator, args, argsNum); - setupSymbolFunctionChain(evaluator); - - /* - * setup OI for input to resultExpr select list - */ - StructObjectInspector selectListInputOI = NPath.createSelectListOI( evaluator, - tDef.getInput()); - ResultExprInfo resultExprInfo = evaluator.resultExprInfo; - ArrayList selectListExprOIs = new ArrayList(); - resultExprInfo.resultExprEvals = new ArrayList(); - - for(int i=0 ; i < resultExprInfo.resultExprNodes.size(); i++) { - ExprNodeDesc selectColumnExprNode =resultExprInfo.resultExprNodes.get(i); - ExprNodeEvaluator selectColumnExprEval = - ExprNodeEvaluatorFactory.get(selectColumnExprNode); - ObjectInspector selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); - resultExprInfo.resultExprEvals.add(selectColumnExprEval); - selectListExprOIs.add(selectColumnOI); - } - - resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( - resultExprInfo.resultExprNames, selectListExprOIs); - setOutputOI(resultExprInfo.resultOI); - } - catch(SemanticException se) { - throw new HiveException(se); - } - } - - @Override - public ArrayList getOutputColumnNames() { - NPath evaluator = (NPath) getEvaluator(); - return evaluator.resultExprInfo.getResultExprNames(); - } - - } - - public ResultExprInfo getResultExprInfo() { - return resultExprInfo; - } - - public void setResultExprInfo(ResultExprInfo resultExprInfo) { - this.resultExprInfo = resultExprInfo; - } - - static class SymbolsInfo { - int sz; - ArrayList symbolExprsEvaluators; - ArrayList symbolExprsOIs; - ArrayList symbolExprsNames; - - SymbolsInfo(int sz) - { - this.sz = sz; - symbolExprsEvaluators = new ArrayList(sz); - symbolExprsOIs = new ArrayList(sz); - symbolExprsNames = new ArrayList(sz); - } - - void add(String name, PTFExpressionDef arg) - { - symbolExprsNames.add(name); - symbolExprsEvaluators.add(arg.getExprEvaluator()); - symbolExprsOIs.add(arg.getOI()); - } - } - - public static class ResultExprInfo { - ArrayList resultExprNames; - ArrayList resultExprNodes; - private transient ArrayList resultExprEvals; - private transient StructObjectInspector resultOI; - - public ArrayList getResultExprNames() { - return resultExprNames; - } - public void setResultExprNames(ArrayList resultExprNames) { - this.resultExprNames = resultExprNames; - } - public ArrayList getResultExprNodes() { - return resultExprNodes; - } - public void setResultExprNodes(ArrayList resultExprNodes) { - this.resultExprNodes = resultExprNodes; - } - } - - public static abstract class SymbolFunction - { - SymbolFunctionResult result; - - public SymbolFunction() - { - result = new SymbolFunctionResult(); - } - - public static SymbolFunctionResult match(SymbolFunction syFn, Object row, - PTFPartitionIterator pItr) throws HiveException - { - int resetToIdx = pItr.getIndex() - 1; - try - { - return syFn.match(row, pItr); - } finally - { - pItr.resetToIndex(resetToIdx); - } - } - - protected abstract SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException; - - protected abstract boolean isOptional(); - } - - public static class Symbol extends SymbolFunction { - ExprNodeEvaluator symbolExprEval; - Converter converter; - - public Symbol(ExprNodeEvaluator symbolExprEval, ObjectInspector symbolOI) - { - this.symbolExprEval = symbolExprEval; - converter = ObjectInspectorConverters.getConverter( - symbolOI, - PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); - } - - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - Object val = null; - val = symbolExprEval.evaluate(row); - val = converter.convert(val); - result.matches = ((Boolean) val).booleanValue(); - result.nextRow = pItr.getIndex(); - - return result; - } - - @Override - protected boolean isOptional() - { - return false; - } - } - - public static class Star extends SymbolFunction { - SymbolFunction symbolFn; - - public Star(SymbolFunction symbolFn) - { - this.symbolFn = symbolFn; - } - - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - result.matches = true; - SymbolFunctionResult rowResult = symbolFn.match(row, pItr); - - while (rowResult.matches && pItr.hasNext()) - { - row = pItr.next(); - rowResult = symbolFn.match(row, pItr); - } - - result.nextRow = pItr.getIndex() - 1; - return result; - } - - @Override - protected boolean isOptional() - { - return true; - } - } - - public static class Plus extends SymbolFunction { - SymbolFunction symbolFn; - - public Plus(SymbolFunction symbolFn) - { - this.symbolFn = symbolFn; - } - - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - SymbolFunctionResult rowResult = symbolFn.match(row, pItr); - - if (!rowResult.matches) - { - result.matches = false; - result.nextRow = pItr.getIndex() - 1; - return result; - } - - result.matches = true; - while (rowResult.matches && pItr.hasNext()) - { - row = pItr.next(); - rowResult = symbolFn.match(row, pItr); - } - - result.nextRow = pItr.getIndex() - 1; - return result; - } - - @Override - protected boolean isOptional() - { - return false; - } - } - - public static class Chain extends SymbolFunction - { - ArrayList components; - - public Chain(ArrayList components) - { - this.components = components; - } - - /* - * Iterate over the Symbol Functions in the Chain: - * - If we are not at the end of the Iterator (i.e. row != null ) - * - match the current componentFn - * - if it returns false, then return false - * - otherwise set row to the next row from the Iterator. - * - if we are at the end of the Iterator - * - skip any optional Symbol Fns (star patterns) at the end. - * - but if we come to a non optional Symbol Fn, return false. - * - if we match all Fns in the chain return true. - */ - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - SymbolFunctionResult componentResult = null; - for (SymbolFunction sFn : components) - { - if (row != null) - { - componentResult = sFn.match(row, pItr); - if (!componentResult.matches) - { - result.matches = false; - result.nextRow = componentResult.nextRow; - return result; - } - row = pItr.resetToIndex(componentResult.nextRow); - } - else - { - if (!sFn.isOptional()) - { - result.matches = false; - result.nextRow = componentResult.nextRow; - return result; - } - } - } - - result.matches = true; - result.nextRow = componentResult.nextRow; - return result; - } - - @Override - protected boolean isOptional() - { - return false; - } - } - - - public static class SymbolFunctionResult - { - /* - * does the row match the pattern represented by this SymbolFunction - */ - public boolean matches; - /* - * what is the index of the row beyond the set of rows that match this pattern. - */ - public int nextRow; - } - - public static class SymbolParser - { - String patternStr; - String[] symbols; - HashMap symbolExprEvalMap; - ArrayList symbolFunctions; - Chain symbolFnChain; - - - public SymbolParser(String patternStr, ArrayList symbolNames, - ArrayList symbolExprEvals, ArrayList symbolExprOIs) - { - super(); - this.patternStr = patternStr; - symbolExprEvalMap = new HashMap(); - int sz = symbolNames.size(); - for(int i=0; i < sz; i++) - { - String symbolName = symbolNames.get(i); - ExprNodeEvaluator symbolExprEval = symbolExprEvals.get(i); - ObjectInspector symbolExprOI = symbolExprOIs.get(i); - symbolExprEvalMap.put(symbolName.toLowerCase(), - new Object[] {symbolExprEval, symbolExprOI}); - } - } - - public SymbolFunction getSymbolFunction() - { - return symbolFnChain; - } - - public void parse() throws SemanticException - { - symbols = patternStr.split("\\."); - symbolFunctions = new ArrayList(); - - for(String symbol : symbols) - { - boolean isStar = symbol.endsWith("*"); - boolean isPlus = symbol.endsWith("+"); - - symbol = (isStar || isPlus) ? symbol.substring(0, symbol.length() - 1) : symbol; - Object[] symbolDetails = symbolExprEvalMap.get(symbol.toLowerCase()); - if ( symbolDetails == null ) - { - throw new SemanticException(String.format("Unknown Symbol %s", symbol)); - } - - ExprNodeEvaluator symbolExprEval = (ExprNodeEvaluator) symbolDetails[0]; - ObjectInspector symbolExprOI = (ObjectInspector) symbolDetails[1]; - SymbolFunction sFn = new Symbol(symbolExprEval, symbolExprOI); - - if ( isStar ) - { - sFn = new Star(sFn); - } - else if ( isPlus ) - { - sFn = new Plus(sFn); - } - symbolFunctions.add(sFn); - } - symbolFnChain = new Chain(symbolFunctions); - } - } - - /* - * ResultExpression is a Select List with the following variation: - * - the select keyword is optional. The parser checks if the expression doesn't start with - * select; if not it prefixes it. - * - Window Fn clauses are not permitted. - * - expressions can operate on the input columns plus the psuedo column 'path' - * which is array of - * structs. The shape of the struct is - * the same as the input. - */ - public static class ResultExpressionParser { - String resultExprString; - - RowResolver selectListInputRowResolver; - TypeCheckCtx selectListInputTypeCheckCtx; - StructObjectInspector selectListInputOI; - - ArrayList selectSpec; - - ResultExprInfo resultExprInfo; - - public ResultExpressionParser(String resultExprString, - RowResolver selectListInputRowResolver) - { - this.resultExprString = resultExprString; - this.selectListInputRowResolver = selectListInputRowResolver; - } - - public void translate() throws SemanticException, HiveException - { - setupSelectListInputInfo(); - fixResultExprString(); - parse(); - validateSelectExpr(); - buildSelectListEvaluators(); - } - - public ResultExprInfo getResultExprInfo() { - return resultExprInfo; - } - - private void buildSelectListEvaluators() throws SemanticException, HiveException - { - resultExprInfo = new ResultExprInfo(); - resultExprInfo.resultExprEvals = new ArrayList(); - resultExprInfo.resultExprNames = new ArrayList(); - resultExprInfo.resultExprNodes = new ArrayList(); - //result - ArrayList selectListExprOIs = new ArrayList(); - int i = 0; - for(WindowExpressionSpec expr : selectSpec) - { - String selectColName = expr.getAlias(); - ASTNode selectColumnNode = expr.getExpression(); - ExprNodeDesc selectColumnExprNode = - ResultExpressionParser.buildExprNode(selectColumnNode, - selectListInputTypeCheckCtx); - ExprNodeEvaluator selectColumnExprEval = - ExprNodeEvaluatorFactory.get(selectColumnExprNode); - ObjectInspector selectColumnOI = null; - selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); - - selectColName = getColumnName(selectColName, selectColumnExprNode, i); - - resultExprInfo.resultExprEvals.add(selectColumnExprEval); - selectListExprOIs.add(selectColumnOI); - resultExprInfo.resultExprNodes.add(selectColumnExprNode); - resultExprInfo.resultExprNames.add(selectColName); - i++; - } - - resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( - resultExprInfo.resultExprNames, selectListExprOIs); - } - - private void setupSelectListInputInfo() throws SemanticException - { - selectListInputTypeCheckCtx = new TypeCheckCtx(selectListInputRowResolver); - selectListInputTypeCheckCtx.setUnparseTranslator(null); - /* - * create SelectListOI - */ - selectListInputOI = (StructObjectInspector) - PTFTranslator.getStandardStructOI(selectListInputRowResolver); - } - - private void fixResultExprString() - { - String r = resultExprString.trim(); - String prefix = r.substring(0, 6); - if (!prefix.toLowerCase().equals("select")) - { - r = "select " + r; - } - resultExprString = r; - } - - private void parse() throws SemanticException - { - selectSpec = SemanticAnalyzer.parseSelect(resultExprString); - } - - private void validateSelectExpr() throws SemanticException - { - for (WindowExpressionSpec expr : selectSpec) - { - PTFTranslator.validateNoLeadLagInValueBoundarySpec(expr.getExpression()); - } - } - - private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx) - { - if (alias != null) - { - return alias; - } - else if (exprNode instanceof ExprNodeColumnDesc) - { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) exprNode; - return colDesc.getColumn(); - } - return "npath_col_" + colIdx; - } - - public static ExprNodeDesc buildExprNode(ASTNode expr, - TypeCheckCtx typeCheckCtx) throws SemanticException - { - // todo: use SemanticAnalyzer::genExprNodeDesc - // currently SA not available to PTFTranslator. - Map map = TypeCheckProcFactory - .genExprNode(expr, typeCheckCtx); - ExprNodeDesc desc = map.get(expr); - if (desc == null) { - String errMsg = typeCheckCtx.getError(); - if ( errMsg == null) { - errMsg = "Error in parsing "; - } - throw new SemanticException(errMsg); - } - return desc; - } - } - - public static final String PATHATTR_NAME = "tpath"; - - /* - * add array to the list of columns - */ - protected static RowResolver createSelectListRR(NPath evaluator, - PTFInputDef inpDef) throws SemanticException { - RowResolver rr = new RowResolver(); - RowResolver inputRR = inpDef.getOutputShape().getRr(); - - evaluator.inputColumnNamesMap = new HashMap(); - ArrayList inputColumnNames = new ArrayList(); - - ArrayList inpColOIs = new ArrayList(); - - for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { - ColumnInfo cInfo = new ColumnInfo(inpCInfo); - String colAlias = cInfo.getAlias(); - - String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); - if (tabColAlias != null) { - colAlias = tabColAlias[1]; - } - ASTNode inExpr = null; - inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR); - if ( inExpr != null ) { - rr.putExpression(inExpr, cInfo); - colAlias = inExpr.toStringTree().toLowerCase(); - } - else { - colAlias = colAlias == null ? cInfo.getInternalName() : colAlias; - rr.put(cInfo.getTabAlias(), colAlias, cInfo); - } - - evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias); - inputColumnNames.add(colAlias); - inpColOIs.add(cInfo.getObjectInspector()); - } - - StandardListObjectInspector pathAttrOI = - ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, - inpColOIs)); - - ColumnInfo pathColumn = new ColumnInfo(PATHATTR_NAME, - TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), - null, - false, false); - rr.put(null, PATHATTR_NAME, pathColumn); - - return rr; - } - - protected static StructObjectInspector createSelectListOI(NPath evaluator, PTFInputDef inpDef) { - StructObjectInspector inOI = inpDef.getOutputShape().getOI(); - ArrayList inputColumnNames = new ArrayList(); - ArrayList selectListNames = new ArrayList(); - ArrayList fieldOIs = new ArrayList(); - for(StructField f : inOI.getAllStructFieldRefs()) { - String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName()); - if ( inputColName != null ) { - inputColumnNames.add(inputColName); - selectListNames.add(f.getFieldName()); - fieldOIs.add(f.getFieldObjectInspector()); - } - } - - StandardListObjectInspector pathAttrOI = - ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, - fieldOIs)); - - ArrayList selectFieldOIs = new ArrayList(); - selectFieldOIs.addAll(fieldOIs); - selectFieldOIs.add(pathAttrOI); - selectListNames.add(NPath.PATHATTR_NAME); - return ObjectInspectorFactory.getStandardStructObjectInspector( - selectListNames, selectFieldOIs); - } - - public static Object getSelectListInput(Object currRow, ObjectInspector rowOI, - PTFPartitionIterator pItr, int sz) { - ArrayList oRow = new ArrayList(); - List currRowAsStdObject = (List) ObjectInspectorUtils - .copyToStandardObject(currRow, rowOI); - oRow.addAll(currRowAsStdObject); - oRow.add(getPath(currRow, rowOI, pItr, sz)); - return oRow; - } - - public static ArrayList getPath(Object currRow, ObjectInspector rowOI, - PTFPartitionIterator pItr, int sz) { - int idx = pItr.getIndex() - 1; - ArrayList path = new ArrayList(); - path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); - int pSz = 1; - - while (pSz < sz && pItr.hasNext()) - { - currRow = pItr.next(); - path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); - pSz++; - } - pItr.resetToIndex(idx); - return path; - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/RegexPath.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/RegexPath.java (working copy) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/RegexPath.java (working copy) @@ -57,54 +57,47 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** - * return rows that meet a specified pattern. Use symbols to specify a list of expressions - * to match. - * Pattern is used to specify a Path. The results list can contain expressions based on - * the input columns and also the matched Path. + * return rows that meet a specified pattern. Use symbols to specify a list of expressions to match. + * Pattern is used to specify a Path. The results list can contain expressions based on the input + * columns and also the matched Path. *
    - *
  1. pattern: pattern for the Path. Path is 'dot' separated list of symbols. - * Each element is treated as a symbol. Elements that end in '*' or '+' are interpreted with - * the usual meaning of zero or more, one or more respectively. For e.g. - * "LATE.EARLY*.ONTIMEOREARLY" implies a sequence of flights - * where the first occurrence was LATE, followed by zero or more EARLY flights, - * followed by a ONTIME or EARLY flight. - *
  2. symbols specify a list of name, expression pairs. For e.g. - * 'LATE', arrival_delay > 0, 'EARLY', arrival_delay < 0 , 'ONTIME', arrival_delay == 0. - * These symbols can be used in the Pattern defined above. - *
  3. resultSelectList specified as a select list. - * The expressions in the selectList are evaluated in the context where all the - * input columns are available, plus the attribute - * "tpath" is available. Path is a collection of rows that represents the matching Path. + *
  4. pattern: pattern for the Path. Path is 'dot' separated list of symbols. Each element + * is treated as a symbol. Elements that end in '*' or '+' are interpreted with the usual meaning of + * zero or more, one or more respectively. For e.g. "LATE.EARLY*.ONTIMEOREARLY" implies a sequence + * of flights where the first occurrence was LATE, followed by zero or more EARLY flights, followed + * by a ONTIME or EARLY flight. + *
  5. symbols specify a list of name, expression pairs. For e.g. 'LATE', arrival_delay > 0, + * 'EARLY', arrival_delay < 0 , 'ONTIME', arrival_delay == 0. These symbols can be used in the + * Pattern defined above. + *
  6. resultSelectList specified as a select list. The expressions in the selectList are + * evaluated in the context where all the input columns are available, plus the attribute "tpath" is + * available. Path is a collection of rows that represents the matching Path. *
*/ -public class NPath extends TableFunctionEvaluator -{ +public class RegexPath extends TableFunctionEvaluator { private transient String patternStr; private transient SymbolsInfo symInfo; private transient String resultExprStr; private transient SymbolFunction syFn; private ResultExprInfo resultExprInfo; + /* - * the names of the Columns of the input to NPath. Used to setup the tpath Struct column. + * the names of the Columns of the input to Regex_Path. Used to setup the tpath Struct column. */ - private HashMap inputColumnNamesMap; + private Map inputColumnNamesMap; @Override - public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws HiveException - { - while (pItr.hasNext()) - { + public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws HiveException { + while (pItr.hasNext()) { Object iRow = pItr.next(); SymbolFunctionResult syFnRes = SymbolFunction.match(syFn, iRow, pItr); - if (syFnRes.matches ) - { + if (syFnRes.matches) { int sz = syFnRes.nextRow - (pItr.getIndex() - 1); - Object selectListInput = NPath.getSelectListInput(iRow, - tDef.getInput().getOutputShape().getOI(), pItr, sz); + Object selectListInput = RegexPath.getSelectListInput(iRow, tDef.getInput() + .getOutputShape().getOI(), pItr, sz); ArrayList oRow = new ArrayList(); - for(ExprNodeEvaluator resExprEval : resultExprInfo.resultExprEvals) - { + for (ExprNodeEvaluator resExprEval : resultExprInfo.resultExprEvals) { oRow.add(resExprEval.evaluate(selectListInput)); } outP.append(oRow); @@ -112,60 +105,52 @@ } } - static void throwErrorWithSignature(String message) throws SemanticException - { - throw new SemanticException(String.format( - "NPath signature is: SymbolPattern, one or more SymbolName, " + - "expression pairs, the result expression as a select list. Error %s", - message)); + static void throwErrorWithSignature(String message) throws SemanticException { + throw new SemanticException( + String.format("RegexPath signature is: SymbolPattern, one or more SymbolName, " + + "expression pairs, the result expression as a select list. Error %s", message)); } - public HashMap getInputColumnNames() { + public Map getInputColumnNames() { return inputColumnNamesMap; } - public void setInputColumnNames(HashMap inputColumnNamesMap) { + public void setInputColumnNames(HashMap inputColumnNamesMap) { this.inputColumnNamesMap = inputColumnNamesMap; } - public static class NPathResolver extends TableFunctionResolver - { + public static class RegexPathResolver extends TableFunctionResolver { @Override protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, - PartitionedTableFunctionDef tDef) - { + PartitionedTableFunctionDef tDef) { - return new NPath(); + return new RegexPath(); } /** *
    - *
  • check structure of Arguments: + *
  • check structure of Arguments: *
      - *
    1. First arg should be a String - *
    2. then there should be an even number of Arguments: - * String, expression; expression should be Convertible to Boolean. - *
    3. finally there should be a String. + *
    4. First arg should be a String + *
    5. then there should be an even number of Arguments: String, expression; expression should + * be Convertible to Boolean. + *
    6. finally there should be a String. *
    - *
  • convert pattern into a NNode chain. - *
  • convert symbol args into a Symbol Map. - *
  • parse selectList into SelectList struct. The inputOI used to translate - * these expressions should be based on the - * columns in the Input, the 'path.attr' + *
  • convert pattern into a NNode chain. + *
  • convert symbol args into a Symbol Map. + *
  • parse selectList into SelectList struct. The inputOI used to translate these expressions + * should be based on the columns in the Input, the 'path.attr' *
*/ @Override - public void setupOutputOI() throws SemanticException - { - NPath evaluator = (NPath) getEvaluator(); + public void setupOutputOI() throws SemanticException { + RegexPath evaluator = (RegexPath) getEvaluator(); PartitionedTableFunctionDef tDef = evaluator.getTableDef(); - - ArrayList args = tDef.getArgs(); + List args = tDef.getArgs(); int argsNum = args == null ? 0 : args.size(); - if ( argsNum < 4 ) - { + if (argsNum < 4) { throwErrorWithSignature("at least 4 arguments required"); } @@ -177,17 +162,16 @@ /* * setup OI for input to resultExpr select list */ - RowResolver selectListInputRR = NPath.createSelectListRR(evaluator, tDef.getInput()); + RowResolver selectListInputRR = RegexPath.createSelectListRR(evaluator, tDef.getInput()); /* * parse ResultExpr Str and setup OI. */ - ResultExpressionParser resultExprParser = - new ResultExpressionParser(evaluator.resultExprStr, selectListInputRR); + ResultExpressionParser resultExprParser = new ResultExpressionParser(evaluator.resultExprStr, + selectListInputRR); try { resultExprParser.translate(); - } - catch(HiveException he) { + } catch (HiveException he) { throw new SemanticException(he); } evaluator.resultExprInfo = resultExprParser.getResultExprInfo(); @@ -195,65 +179,57 @@ setOutputOI(OI); } + /* * validate and setup patternStr */ - private void validateAndSetupPatternStr(NPath evaluator, - ArrayList args) throws SemanticException { + private void validateAndSetupPatternStr(RegexPath evaluator, List args) + throws SemanticException { PTFExpressionDef symboPatternArg = args.get(0); ObjectInspector symbolPatternArgOI = symboPatternArg.getOI(); - if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) || - (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symbolPatternArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { + if (!ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) + || (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) + || ((PrimitiveObjectInspector) symbolPatternArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { throwErrorWithSignature("Currently the symbol Pattern must be a Constant String."); } - evaluator.patternStr = ((ConstantObjectInspector)symbolPatternArgOI). - getWritableConstantValue().toString(); + evaluator.patternStr = ((ConstantObjectInspector) symbolPatternArgOI) + .getWritableConstantValue().toString(); } /* * validate and setup SymbolInfo */ - private void validateAndSetupSymbolInfo(NPath evaluator, - ArrayList args, - int argsNum) throws SemanticException { + private void validateAndSetupSymbolInfo(RegexPath evaluator, List args, + int argsNum) throws SemanticException { int symbolArgsSz = argsNum - 2; - if ( symbolArgsSz % 2 != 0) - { - throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " + - "there are odd number of symbol args"); + if (symbolArgsSz % 2 != 0) { + throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " + + "there are odd number of symbol args"); } - evaluator.symInfo = new SymbolsInfo(symbolArgsSz/2); - for(int i=1; i <= symbolArgsSz; i += 2) - { + evaluator.symInfo = new SymbolsInfo(symbolArgsSz / 2); + for (int i = 1; i <= symbolArgsSz; i += 2) { PTFExpressionDef symbolNameArg = args.get(i); ObjectInspector symbolNameArgOI = symbolNameArg.getOI(); - if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) || - (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symbolNameArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { - throwErrorWithSignature( - String.format("Currently a Symbol Name(%s) must be a Constant String", + if (!ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) + || (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) + || ((PrimitiveObjectInspector) symbolNameArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { + throwErrorWithSignature(String.format( + "Currently a Symbol Name(%s) must be a Constant String", symbolNameArg.getExpressionTreeString())); } - String symbolName = ((ConstantObjectInspector)symbolNameArgOI). - getWritableConstantValue().toString(); + String symbolName = ((ConstantObjectInspector) symbolNameArgOI).getWritableConstantValue() + .toString(); - PTFExpressionDef symolExprArg = args.get(i+1); + PTFExpressionDef symolExprArg = args.get(i + 1); ObjectInspector symolExprArgOI = symolExprArg.getOI(); - if ( (symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symolExprArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN ) - { - throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " + - "must be a boolean expression", symolExprArg.getExpressionTreeString())); + if ((symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) + || ((PrimitiveObjectInspector) symolExprArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) { + throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " + + "must be a boolean expression", symolExprArg.getExpressionTreeString())); } evaluator.symInfo.add(symbolName, symolExprArg); } @@ -262,45 +238,40 @@ /* * validate and setup resultExprStr */ - private void validateAndSetupResultExprStr(NPath evaluator, - ArrayList args, - int argsNum) throws SemanticException { + private void validateAndSetupResultExprStr(RegexPath evaluator, + List args, int argsNum) throws SemanticException { PTFExpressionDef resultExprArg = args.get(argsNum - 1); ObjectInspector resultExprArgOI = resultExprArg.getOI(); - if ( !ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) || - (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)resultExprArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { + if (!ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) + || (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) + || ((PrimitiveObjectInspector) resultExprArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { throwErrorWithSignature("Currently the result Expr parameter must be a Constant String."); } - evaluator.resultExprStr = ((ConstantObjectInspector)resultExprArgOI). - getWritableConstantValue().toString(); + evaluator.resultExprStr = ((ConstantObjectInspector) resultExprArgOI) + .getWritableConstantValue().toString(); } /* * setup SymbolFunction chain. */ - private void setupSymbolFunctionChain(NPath evaluator) throws SemanticException { - SymbolParser syP = new SymbolParser(evaluator.patternStr, - evaluator.symInfo.symbolExprsNames, - evaluator.symInfo.symbolExprsEvaluators, evaluator.symInfo.symbolExprsOIs); + private void setupSymbolFunctionChain(RegexPath evaluator) throws SemanticException { + SymbolParser syP = new SymbolParser(evaluator.patternStr, evaluator.symInfo.symbolExprsNames, + evaluator.symInfo.symbolExprsEvaluators, evaluator.symInfo.symbolExprsOIs); syP.parse(); evaluator.syFn = syP.getSymbolFunction(); } @Override - public boolean transformsRawInput() - { + public boolean transformsRawInput() { return false; } @Override public void initializeOutputOI() throws HiveException { try { - NPath evaluator = (NPath) getEvaluator(); + RegexPath evaluator = (RegexPath) getEvaluator(); PartitionedTableFunctionDef tDef = evaluator.getTableDef(); ArrayList args = tDef.getArgs(); @@ -314,33 +285,32 @@ /* * setup OI for input to resultExpr select list */ - StructObjectInspector selectListInputOI = NPath.createSelectListOI( evaluator, - tDef.getInput()); + StructObjectInspector selectListInputOI = RegexPath.createSelectListOI(evaluator, + tDef.getInput()); ResultExprInfo resultExprInfo = evaluator.resultExprInfo; ArrayList selectListExprOIs = new ArrayList(); resultExprInfo.resultExprEvals = new ArrayList(); - for(int i=0 ; i < resultExprInfo.resultExprNodes.size(); i++) { - ExprNodeDesc selectColumnExprNode =resultExprInfo.resultExprNodes.get(i); - ExprNodeEvaluator selectColumnExprEval = - ExprNodeEvaluatorFactory.get(selectColumnExprNode); + for (int i = 0; i < resultExprInfo.resultExprNodes.size(); i++) { + ExprNodeDesc selectColumnExprNode = resultExprInfo.resultExprNodes.get(i); + ExprNodeEvaluator selectColumnExprEval = ExprNodeEvaluatorFactory + .get(selectColumnExprNode); ObjectInspector selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); resultExprInfo.resultExprEvals.add(selectColumnExprEval); selectListExprOIs.add(selectColumnOI); } resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( - resultExprInfo.resultExprNames, selectListExprOIs); + resultExprInfo.resultExprNames, selectListExprOIs); setOutputOI(resultExprInfo.resultOI); - } - catch(SemanticException se) { + } catch (SemanticException se) { throw new HiveException(se); } } @Override public ArrayList getOutputColumnNames() { - NPath evaluator = (NPath) getEvaluator(); + RegexPath evaluator = (RegexPath) getEvaluator(); return evaluator.resultExprInfo.getResultExprNames(); } @@ -356,20 +326,21 @@ static class SymbolsInfo { int sz; + ArrayList symbolExprsEvaluators; + ArrayList symbolExprsOIs; + ArrayList symbolExprsNames; - SymbolsInfo(int sz) - { + SymbolsInfo(int sz) { this.sz = sz; symbolExprsEvaluators = new ArrayList(sz); symbolExprsOIs = new ArrayList(sz); symbolExprsNames = new ArrayList(sz); } - void add(String name, PTFExpressionDef arg) - { + void add(String name, PTFExpressionDef arg) { symbolExprsNames.add(name); symbolExprsEvaluators.add(arg.getExprEvaluator()); symbolExprsOIs.add(arg.getOI()); @@ -378,68 +349,67 @@ public static class ResultExprInfo { ArrayList resultExprNames; + ArrayList resultExprNodes; + private transient ArrayList resultExprEvals; + private transient StructObjectInspector resultOI; public ArrayList getResultExprNames() { return resultExprNames; } + public void setResultExprNames(ArrayList resultExprNames) { this.resultExprNames = resultExprNames; } + public ArrayList getResultExprNodes() { return resultExprNodes; } + public void setResultExprNodes(ArrayList resultExprNodes) { this.resultExprNodes = resultExprNodes; } } - public static abstract class SymbolFunction - { + public static abstract class SymbolFunction { SymbolFunctionResult result; - public SymbolFunction() - { + public SymbolFunction() { result = new SymbolFunctionResult(); } public static SymbolFunctionResult match(SymbolFunction syFn, Object row, - PTFPartitionIterator pItr) throws HiveException - { + PTFPartitionIterator pItr) throws HiveException { int resetToIdx = pItr.getIndex() - 1; - try - { + try { return syFn.match(row, pItr); - } finally - { + } finally { pItr.resetToIndex(resetToIdx); } } protected abstract SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException; + throws HiveException; protected abstract boolean isOptional(); } public static class Symbol extends SymbolFunction { ExprNodeEvaluator symbolExprEval; + Converter converter; - public Symbol(ExprNodeEvaluator symbolExprEval, ObjectInspector symbolOI) - { + public Symbol(ExprNodeEvaluator symbolExprEval, ObjectInspector symbolOI) { this.symbolExprEval = symbolExprEval; - converter = ObjectInspectorConverters.getConverter( - symbolOI, - PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); + converter = ObjectInspectorConverters.getConverter(symbolOI, + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); } @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { + throws HiveException { Object val = null; val = symbolExprEval.evaluate(row); val = converter.convert(val); @@ -450,8 +420,7 @@ } @Override - protected boolean isOptional() - { + protected boolean isOptional() { return false; } } @@ -459,20 +428,17 @@ public static class Star extends SymbolFunction { SymbolFunction symbolFn; - public Star(SymbolFunction symbolFn) - { + public Star(SymbolFunction symbolFn) { this.symbolFn = symbolFn; } @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { + throws HiveException { result.matches = true; SymbolFunctionResult rowResult = symbolFn.match(row, pItr); - while (rowResult.matches && pItr.hasNext()) - { + while (rowResult.matches && pItr.hasNext()) { row = pItr.next(); rowResult = symbolFn.match(row, pItr); } @@ -482,8 +448,7 @@ } @Override - protected boolean isOptional() - { + protected boolean isOptional() { return true; } } @@ -491,27 +456,23 @@ public static class Plus extends SymbolFunction { SymbolFunction symbolFn; - public Plus(SymbolFunction symbolFn) - { + public Plus(SymbolFunction symbolFn) { this.symbolFn = symbolFn; } @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { + throws HiveException { SymbolFunctionResult rowResult = symbolFn.match(row, pItr); - if (!rowResult.matches) - { + if (!rowResult.matches) { result.matches = false; result.nextRow = pItr.getIndex() - 1; return result; } result.matches = true; - while (rowResult.matches && pItr.hasNext()) - { + while (rowResult.matches && pItr.hasNext()) { row = pItr.next(); rowResult = symbolFn.match(row, pItr); } @@ -521,54 +482,40 @@ } @Override - protected boolean isOptional() - { + protected boolean isOptional() { return false; } } - public static class Chain extends SymbolFunction - { + public static class Chain extends SymbolFunction { ArrayList components; - public Chain(ArrayList components) - { + public Chain(ArrayList components) { this.components = components; } /* - * Iterate over the Symbol Functions in the Chain: - * - If we are not at the end of the Iterator (i.e. row != null ) - * - match the current componentFn - * - if it returns false, then return false - * - otherwise set row to the next row from the Iterator. - * - if we are at the end of the Iterator - * - skip any optional Symbol Fns (star patterns) at the end. - * - but if we come to a non optional Symbol Fn, return false. - * - if we match all Fns in the chain return true. + * Iterate over the Symbol Functions in the Chain: - If we are not at the end of the Iterator + * (i.e. row != null ) - match the current componentFn - if it returns false, then return false + * - otherwise set row to the next row from the Iterator. - if we are at the end of the Iterator + * - skip any optional Symbol Fns (star patterns) at the end. - but if we come to a non optional + * Symbol Fn, return false. - if we match all Fns in the chain return true. */ @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { + throws HiveException { SymbolFunctionResult componentResult = null; - for (SymbolFunction sFn : components) - { - if (row != null) - { + for (SymbolFunction sFn : components) { + if (row != null) { componentResult = sFn.match(row, pItr); - if (!componentResult.matches) - { + if (!componentResult.matches) { result.matches = false; result.nextRow = componentResult.nextRow; return result; } row = pItr.resetToIndex(componentResult.nextRow); - } - else - { - if (!sFn.isOptional()) - { + } else { + if (!sFn.isOptional()) { result.matches = false; result.nextRow = componentResult.nextRow; return result; @@ -582,70 +529,64 @@ } @Override - protected boolean isOptional() - { + protected boolean isOptional() { return false; } } - - public static class SymbolFunctionResult - { + public static class SymbolFunctionResult { /* * does the row match the pattern represented by this SymbolFunction */ public boolean matches; + /* * what is the index of the row beyond the set of rows that match this pattern. */ public int nextRow; } - public static class SymbolParser - { + public static class SymbolParser { String patternStr; + String[] symbols; + HashMap symbolExprEvalMap; + ArrayList symbolFunctions; + Chain symbolFnChain; - public SymbolParser(String patternStr, ArrayList symbolNames, - ArrayList symbolExprEvals, ArrayList symbolExprOIs) - { + ArrayList symbolExprEvals, ArrayList symbolExprOIs) { super(); this.patternStr = patternStr; symbolExprEvalMap = new HashMap(); int sz = symbolNames.size(); - for(int i=0; i < sz; i++) - { + for (int i = 0; i < sz; i++) { String symbolName = symbolNames.get(i); ExprNodeEvaluator symbolExprEval = symbolExprEvals.get(i); ObjectInspector symbolExprOI = symbolExprOIs.get(i); symbolExprEvalMap.put(symbolName.toLowerCase(), - new Object[] {symbolExprEval, symbolExprOI}); + new Object[] { symbolExprEval, symbolExprOI }); } } - public SymbolFunction getSymbolFunction() - { + public SymbolFunction getSymbolFunction() { return symbolFnChain; } - public void parse() throws SemanticException - { + public void parse() throws SemanticException { symbols = patternStr.split("\\."); symbolFunctions = new ArrayList(); - for(String symbol : symbols) - { + for (String symbol : symbols) { boolean isStar = symbol.endsWith("*"); boolean isPlus = symbol.endsWith("+"); symbol = (isStar || isPlus) ? symbol.substring(0, symbol.length() - 1) : symbol; Object[] symbolDetails = symbolExprEvalMap.get(symbol.toLowerCase()); - if ( symbolDetails == null ) - { + if (symbolDetails == null) { throw new SemanticException(String.format("Unknown Symbol %s", symbol)); } @@ -653,12 +594,9 @@ ObjectInspector symbolExprOI = (ObjectInspector) symbolDetails[1]; SymbolFunction sFn = new Symbol(symbolExprEval, symbolExprOI); - if ( isStar ) - { + if (isStar) { sFn = new Star(sFn); - } - else if ( isPlus ) - { + } else if (isPlus) { sFn = new Plus(sFn); } symbolFunctions.add(sFn); @@ -668,35 +606,31 @@ } /* - * ResultExpression is a Select List with the following variation: - * - the select keyword is optional. The parser checks if the expression doesn't start with - * select; if not it prefixes it. - * - Window Fn clauses are not permitted. - * - expressions can operate on the input columns plus the psuedo column 'path' - * which is array of - * structs. The shape of the struct is - * the same as the input. + * ResultExpression is a Select List with the following variation: - the select keyword is + * optional. The parser checks if the expression doesn't start with select; if not it prefixes it. + * - Window Fn clauses are not permitted. - expressions can operate on the input columns plus the + * psuedo column 'path' which is array of structs. The shape of the struct is the same as the + * input. */ public static class ResultExpressionParser { String resultExprString; RowResolver selectListInputRowResolver; + TypeCheckCtx selectListInputTypeCheckCtx; + StructObjectInspector selectListInputOI; ArrayList selectSpec; ResultExprInfo resultExprInfo; - public ResultExpressionParser(String resultExprString, - RowResolver selectListInputRowResolver) - { + public ResultExpressionParser(String resultExprString, RowResolver selectListInputRowResolver) { this.resultExprString = resultExprString; this.selectListInputRowResolver = selectListInputRowResolver; } - public void translate() throws SemanticException, HiveException - { + public void translate() throws SemanticException, HiveException { setupSelectListInputInfo(); fixResultExprString(); parse(); @@ -708,24 +642,20 @@ return resultExprInfo; } - private void buildSelectListEvaluators() throws SemanticException, HiveException - { + private void buildSelectListEvaluators() throws SemanticException, HiveException { resultExprInfo = new ResultExprInfo(); resultExprInfo.resultExprEvals = new ArrayList(); resultExprInfo.resultExprNames = new ArrayList(); resultExprInfo.resultExprNodes = new ArrayList(); - //result + // result ArrayList selectListExprOIs = new ArrayList(); int i = 0; - for(WindowExpressionSpec expr : selectSpec) - { + for (WindowExpressionSpec expr : selectSpec) { String selectColName = expr.getAlias(); ASTNode selectColumnNode = expr.getExpression(); - ExprNodeDesc selectColumnExprNode = - ResultExpressionParser.buildExprNode(selectColumnNode, - selectListInputTypeCheckCtx); - ExprNodeEvaluator selectColumnExprEval = - ExprNodeEvaluatorFactory.get(selectColumnExprNode); + ExprNodeDesc selectColumnExprNode = ResultExpressionParser.buildExprNode(selectColumnNode, + selectListInputTypeCheckCtx); + ExprNodeEvaluator selectColumnExprEval = ExprNodeEvaluatorFactory.get(selectColumnExprNode); ObjectInspector selectColumnOI = null; selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); @@ -739,69 +669,57 @@ } resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( - resultExprInfo.resultExprNames, selectListExprOIs); + resultExprInfo.resultExprNames, selectListExprOIs); } - private void setupSelectListInputInfo() throws SemanticException - { + private void setupSelectListInputInfo() throws SemanticException { selectListInputTypeCheckCtx = new TypeCheckCtx(selectListInputRowResolver); selectListInputTypeCheckCtx.setUnparseTranslator(null); /* * create SelectListOI */ - selectListInputOI = (StructObjectInspector) - PTFTranslator.getStandardStructOI(selectListInputRowResolver); + selectListInputOI = (StructObjectInspector) PTFTranslator + .getStandardStructOI(selectListInputRowResolver); } - private void fixResultExprString() - { + private void fixResultExprString() { String r = resultExprString.trim(); String prefix = r.substring(0, 6); - if (!prefix.toLowerCase().equals("select")) - { + if (!prefix.toLowerCase().equals("select")) { r = "select " + r; } resultExprString = r; } - private void parse() throws SemanticException - { + private void parse() throws SemanticException { selectSpec = SemanticAnalyzer.parseSelect(resultExprString); } - private void validateSelectExpr() throws SemanticException - { - for (WindowExpressionSpec expr : selectSpec) - { + private void validateSelectExpr() throws SemanticException { + for (WindowExpressionSpec expr : selectSpec) { PTFTranslator.validateNoLeadLagInValueBoundarySpec(expr.getExpression()); } } - private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx) - { - if (alias != null) - { + private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx) { + if (alias != null) { return alias; - } - else if (exprNode instanceof ExprNodeColumnDesc) - { + } else if (exprNode instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) exprNode; return colDesc.getColumn(); } - return "npath_col_" + colIdx; + return "regexpath_col_" + colIdx; } - public static ExprNodeDesc buildExprNode(ASTNode expr, - TypeCheckCtx typeCheckCtx) throws SemanticException - { + public static ExprNodeDesc buildExprNode(ASTNode expr, TypeCheckCtx typeCheckCtx) + throws SemanticException { // todo: use SemanticAnalyzer::genExprNodeDesc // currently SA not available to PTFTranslator. - Map map = TypeCheckProcFactory - .genExprNode(expr, typeCheckCtx); + Map map = TypeCheckProcFactory.genExprNode(expr, typeCheckCtx); ExprNodeDesc desc = map.get(expr); if (desc == null) { String errMsg = typeCheckCtx.getError(); - if ( errMsg == null) { + if (errMsg == null) { errMsg = "Error in parsing "; } throw new SemanticException(errMsg); @@ -815,15 +733,15 @@ /* * add array to the list of columns */ - protected static RowResolver createSelectListRR(NPath evaluator, - PTFInputDef inpDef) throws SemanticException { + protected static RowResolver createSelectListRR(RegexPath evaluator, PTFInputDef inpDef) + throws SemanticException { RowResolver rr = new RowResolver(); RowResolver inputRR = inpDef.getOutputShape().getRr(); - evaluator.inputColumnNamesMap = new HashMap(); + evaluator.inputColumnNamesMap = new HashMap(); ArrayList inputColumnNames = new ArrayList(); - ArrayList inpColOIs = new ArrayList(); + List inpColOIs = new ArrayList(); for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { ColumnInfo cInfo = new ColumnInfo(inpCInfo); @@ -835,11 +753,10 @@ } ASTNode inExpr = null; inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR); - if ( inExpr != null ) { + if (inExpr != null) { rr.putExpression(inExpr, cInfo); colAlias = inExpr.toStringTree().toLowerCase(); - } - else { + } else { colAlias = colAlias == null ? cInfo.getInternalName() : colAlias; rr.put(cInfo.getTabAlias(), colAlias, cInfo); } @@ -849,66 +766,60 @@ inpColOIs.add(cInfo.getObjectInspector()); } - StandardListObjectInspector pathAttrOI = - ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, - inpColOIs)); + StandardListObjectInspector pathAttrOI = ObjectInspectorFactory + .getStandardListObjectInspector(ObjectInspectorFactory + .getStandardStructObjectInspector(inputColumnNames, inpColOIs)); ColumnInfo pathColumn = new ColumnInfo(PATHATTR_NAME, - TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), - null, - false, false); + TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), null, false, false); rr.put(null, PATHATTR_NAME, pathColumn); return rr; } - protected static StructObjectInspector createSelectListOI(NPath evaluator, PTFInputDef inpDef) { + protected static StructObjectInspector createSelectListOI(RegexPath evaluator, PTFInputDef inpDef) { StructObjectInspector inOI = inpDef.getOutputShape().getOI(); ArrayList inputColumnNames = new ArrayList(); ArrayList selectListNames = new ArrayList(); ArrayList fieldOIs = new ArrayList(); - for(StructField f : inOI.getAllStructFieldRefs()) { + for (StructField f : inOI.getAllStructFieldRefs()) { String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName()); - if ( inputColName != null ) { + if (inputColName != null) { inputColumnNames.add(inputColName); selectListNames.add(f.getFieldName()); fieldOIs.add(f.getFieldObjectInspector()); } } - StandardListObjectInspector pathAttrOI = - ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, - fieldOIs)); + StandardListObjectInspector pathAttrOI = ObjectInspectorFactory + .getStandardListObjectInspector(ObjectInspectorFactory + .getStandardStructObjectInspector(inputColumnNames, fieldOIs)); - ArrayList selectFieldOIs = new ArrayList(); + List selectFieldOIs = new ArrayList(); selectFieldOIs.addAll(fieldOIs); selectFieldOIs.add(pathAttrOI); - selectListNames.add(NPath.PATHATTR_NAME); - return ObjectInspectorFactory.getStandardStructObjectInspector( - selectListNames, selectFieldOIs); + selectListNames.add(RegexPath.PATHATTR_NAME); + return ObjectInspectorFactory.getStandardStructObjectInspector(selectListNames, selectFieldOIs); } public static Object getSelectListInput(Object currRow, ObjectInspector rowOI, - PTFPartitionIterator pItr, int sz) { - ArrayList oRow = new ArrayList(); + PTFPartitionIterator pItr, int sz) { + List oRow = new ArrayList(); List currRowAsStdObject = (List) ObjectInspectorUtils - .copyToStandardObject(currRow, rowOI); + .copyToStandardObject(currRow, rowOI); oRow.addAll(currRowAsStdObject); oRow.add(getPath(currRow, rowOI, pItr, sz)); return oRow; } - public static ArrayList getPath(Object currRow, ObjectInspector rowOI, - PTFPartitionIterator pItr, int sz) { + public static List getPath(Object currRow, ObjectInspector rowOI, + PTFPartitionIterator pItr, int sz) { int idx = pItr.getIndex() - 1; - ArrayList path = new ArrayList(); + List path = new ArrayList(); path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); int pSz = 1; - while (pSz < sz && pItr.hasNext()) - { + while (pSz < sz && pItr.hasNext()) { currRow = pItr.next(); path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); pSz++; Index: ql/src/test/queries/clientpositive/ptf_npath.q =================================================================== --- ql/src/test/queries/clientpositive/ptf_npath.q (revision 1513727) +++ ql/src/test/queries/clientpositive/ptf_npath.q (working copy) @@ -1,36 +0,0 @@ -DROP TABLE flights_tiny; - -create table flights_tiny ( -ORIGIN_CITY_NAME string, -DEST_CITY_NAME string, -YEAR int, -MONTH int, -DAY_OF_MONTH int, -ARR_DELAY float, -FL_NUM string -); - -LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny; - --- 1. basic Npath test -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - distribute by fl_num - sort by year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ); - --- 2. Npath on 1 partition -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - sort by fl_num, year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ) -where fl_num = 1142; - \ No newline at end of file Index: ql/src/test/queries/clientpositive/ptf_regexpath.q =================================================================== --- ql/src/test/queries/clientpositive/ptf_regexpath.q (working copy) +++ ql/src/test/queries/clientpositive/ptf_regexpath.q (working copy) @@ -1,4 +1,4 @@ -DROP TABLE flights_tiny; +DROP TABLE IF EXISTS flights_tiny; create table flights_tiny ( ORIGIN_CITY_NAME string, @@ -12,9 +12,9 @@ LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny; --- 1. basic Npath test +-- 1. basic regex_path test select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on +from regex_path(on flights_tiny distribute by fl_num sort by year, month, day_of_month @@ -23,9 +23,9 @@ arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ); --- 2. Npath on 1 partition +-- 2. RegexPath on 1 partition select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on +from regexpath(on flights_tiny sort by fl_num, year, month, day_of_month arg1('LATE.LATE+'), @@ -33,4 +33,4 @@ arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ) where fl_num = 1142; - \ No newline at end of file + Index: ql/src/test/queries/clientpositive/ptf_register_tblfn.q =================================================================== --- ql/src/test/queries/clientpositive/ptf_register_tblfn.q (revision 1513727) +++ ql/src/test/queries/clientpositive/ptf_register_tblfn.q (working copy) @@ -12,12 +12,12 @@ LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny; -create temporary function npathtest as 'org.apache.hadoop.hive.ql.udf.ptf.NPath$NPathResolver'; +create temporary function regexpathtest as 'org.apache.hadoop.hive.ql.udf.ptf.RegexPath$RegexPathResolver'; --- 1. basic Npath test +-- 1. basic RegexPath test select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npathtest(on +from regexpathtest(on flights_tiny distribute by fl_num sort by year, month, day_of_month @@ -26,4 +26,4 @@ arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ); -drop temporary function npathtest; +drop temporary function regexpathtest; Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out (revision 1513727) +++ ql/src/test/results/clientpositive/show_functions.q.out (working copy) @@ -116,7 +116,6 @@ noop noopwithmap not -npath ntile nvl or @@ -136,6 +135,7 @@ rank reflect reflect2 +regex_path regexp regexp_extract regexp_replace