diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index d151eb4..30df771 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -132,7 +132,7 @@ import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag.GenericUDFLag; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag.GenericUDFLead; -import org.apache.hadoop.hive.ql.udf.ptf.NPath.NPathResolver; +import org.apache.hadoop.hive.ql.udf.ptf.MatchPath.MatchPathResolver; import org.apache.hadoop.hive.ql.udf.ptf.Noop.NoopResolver; import org.apache.hadoop.hive.ql.udf.ptf.NoopWithMap.NoopWithMapResolver; import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver; @@ -456,7 +456,7 @@ registerTableFunction(NOOP_TABLE_FUNCTION, NoopResolver.class); registerTableFunction(NOOP_MAP_TABLE_FUNCTION, NoopWithMapResolver.class); registerTableFunction(WINDOWING_TABLE_FUNCTION, WindowingTableFunctionResolver.class); - registerTableFunction("npath", NPathResolver.class); + registerTableFunction("matchpath", MatchPathResolver.class); } public static void registerTemporaryUDF(String functionName, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java new file mode 100644 index 0000000..31fa5e3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java @@ -0,0 +1,919 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.ptf; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.PTFPartition; +import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.PTFTranslator; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.TypeCheckCtx; +import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef; +import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFInputDef; +import org.apache.hadoop.hive.ql.plan.PTFDesc.PartitionedTableFunctionDef; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * return rows that meet a specified pattern. Use symbols to specify a list of expressions + * to match. + * Pattern is used to specify a Path. The results list can contain expressions based on + * the input columns and also the matched Path. + *
    + *
  1. pattern: pattern for the Path. Path is 'dot' separated list of symbols. + * Each element is treated as a symbol. Elements that end in '*' or '+' are interpreted with + * the usual meaning of zero or more, one or more respectively. For e.g. + * "LATE.EARLY*.ONTIMEOREARLY" implies a sequence of flights + * where the first occurrence was LATE, followed by zero or more EARLY flights, + * followed by a ONTIME or EARLY flight. + *
  2. symbols specify a list of name, expression pairs. For e.g. + * 'LATE', arrival_delay > 0, 'EARLY', arrival_delay < 0 , 'ONTIME', arrival_delay == 0. + * These symbols can be used in the Pattern defined above. + *
  3. resultSelectList specified as a select list. + * The expressions in the selectList are evaluated in the context where all the + * input columns are available, plus the attribute + * "tpath" is available. Path is a collection of rows that represents the matching Path. + *
+ */ +public class MatchPath extends TableFunctionEvaluator +{ + private transient String patternStr; + private transient SymbolsInfo symInfo; + private transient String resultExprStr; + private transient SymbolFunction syFn; + private ResultExprInfo resultExprInfo; + /* + * the names of the Columns of the input to MatchPath. Used to setup the tpath Struct column. + */ + private HashMap inputColumnNamesMap; + + @Override + public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws HiveException + { + while (pItr.hasNext()) + { + Object iRow = pItr.next(); + + SymbolFunctionResult syFnRes = SymbolFunction.match(syFn, iRow, pItr); + if (syFnRes.matches ) + { + int sz = syFnRes.nextRow - (pItr.getIndex() - 1); + Object selectListInput = MatchPath.getSelectListInput(iRow, + tDef.getInput().getOutputShape().getOI(), pItr, sz); + ArrayList oRow = new ArrayList(); + for(ExprNodeEvaluator resExprEval : resultExprInfo.resultExprEvals) + { + oRow.add(resExprEval.evaluate(selectListInput)); + } + outP.append(oRow); + } + } + } + + static void throwErrorWithSignature(String message) throws SemanticException + { + throw new SemanticException(String.format( + "MatchPath signature is: SymbolPattern, one or more SymbolName, " + + "expression pairs, the result expression as a select list. Error %s", + message)); + } + + public HashMap getInputColumnNames() { + return inputColumnNamesMap; + } + + public void setInputColumnNames(HashMap inputColumnNamesMap) { + this.inputColumnNamesMap = inputColumnNamesMap; + } + + public static class MatchPathResolver extends TableFunctionResolver + { + + @Override + protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, + PartitionedTableFunctionDef tDef) + { + + return new MatchPath(); + } + + /** + *
    + *
  • check structure of Arguments: + *
      + *
    1. First arg should be a String + *
    2. then there should be an even number of Arguments: + * String, expression; expression should be Convertible to Boolean. + *
    3. finally there should be a String. + *
    + *
  • convert pattern into a NNode chain. + *
  • convert symbol args into a Symbol Map. + *
  • parse selectList into SelectList struct. The inputOI used to translate + * these expressions should be based on the + * columns in the Input, the 'path.attr' + *
+ */ + @Override + public void setupOutputOI() throws SemanticException + { + MatchPath evaluator = (MatchPath) getEvaluator(); + PartitionedTableFunctionDef tDef = evaluator.getTableDef(); + + ArrayList args = tDef.getArgs(); + int argsNum = args == null ? 0 : args.size(); + + if ( argsNum < 4 ) + { + throwErrorWithSignature("at least 4 arguments required"); + } + + validateAndSetupPatternStr(evaluator, args); + validateAndSetupSymbolInfo(evaluator, args, argsNum); + validateAndSetupResultExprStr(evaluator, args, argsNum); + setupSymbolFunctionChain(evaluator); + + /* + * setup OI for input to resultExpr select list + */ + RowResolver selectListInputRR = MatchPath.createSelectListRR(evaluator, tDef.getInput()); + + /* + * parse ResultExpr Str and setup OI. + */ + ResultExpressionParser resultExprParser = + new ResultExpressionParser(evaluator.resultExprStr, selectListInputRR); + try { + resultExprParser.translate(); + } + catch(HiveException he) { + throw new SemanticException(he); + } + evaluator.resultExprInfo = resultExprParser.getResultExprInfo(); + StructObjectInspector OI = evaluator.resultExprInfo.resultOI; + + setOutputOI(OI); + } + /* + * validate and setup patternStr + */ + private void validateAndSetupPatternStr(MatchPath evaluator, + ArrayList args) throws SemanticException { + PTFExpressionDef symboPatternArg = args.get(0); + ObjectInspector symbolPatternArgOI = symboPatternArg.getOI(); + + if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) || + (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || + ((PrimitiveObjectInspector)symbolPatternArgOI).getPrimitiveCategory() != + PrimitiveObjectInspector.PrimitiveCategory.STRING ) + { + throwErrorWithSignature("Currently the symbol Pattern must be a Constant String."); + } + + evaluator.patternStr = ((ConstantObjectInspector)symbolPatternArgOI). + getWritableConstantValue().toString(); + } + + /* + * validate and setup SymbolInfo + */ + private void validateAndSetupSymbolInfo(MatchPath evaluator, + ArrayList args, + int argsNum) throws SemanticException { + int symbolArgsSz = argsNum - 2; + if ( symbolArgsSz % 2 != 0) + { + throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " + + "there are odd number of symbol args"); + } + + evaluator.symInfo = new SymbolsInfo(symbolArgsSz/2); + for(int i=1; i <= symbolArgsSz; i += 2) + { + PTFExpressionDef symbolNameArg = args.get(i); + ObjectInspector symbolNameArgOI = symbolNameArg.getOI(); + + if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) || + (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || + ((PrimitiveObjectInspector)symbolNameArgOI).getPrimitiveCategory() != + PrimitiveObjectInspector.PrimitiveCategory.STRING ) + { + throwErrorWithSignature( + String.format("Currently a Symbol Name(%s) must be a Constant String", + symbolNameArg.getExpressionTreeString())); + } + String symbolName = ((ConstantObjectInspector)symbolNameArgOI). + getWritableConstantValue().toString(); + + PTFExpressionDef symolExprArg = args.get(i+1); + ObjectInspector symolExprArgOI = symolExprArg.getOI(); + if ( (symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || + ((PrimitiveObjectInspector)symolExprArgOI).getPrimitiveCategory() != + PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN ) + { + throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " + + "must be a boolean expression", symolExprArg.getExpressionTreeString())); + } + evaluator.symInfo.add(symbolName, symolExprArg); + } + } + + /* + * validate and setup resultExprStr + */ + private void validateAndSetupResultExprStr(MatchPath evaluator, + ArrayList args, + int argsNum) throws SemanticException { + PTFExpressionDef resultExprArg = args.get(argsNum - 1); + ObjectInspector resultExprArgOI = resultExprArg.getOI(); + + if ( !ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) || + (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || + ((PrimitiveObjectInspector)resultExprArgOI).getPrimitiveCategory() != + PrimitiveObjectInspector.PrimitiveCategory.STRING ) + { + throwErrorWithSignature("Currently the result Expr parameter must be a Constant String."); + } + + evaluator.resultExprStr = ((ConstantObjectInspector)resultExprArgOI). + getWritableConstantValue().toString(); + } + + /* + * setup SymbolFunction chain. + */ + private void setupSymbolFunctionChain(MatchPath evaluator) throws SemanticException { + SymbolParser syP = new SymbolParser(evaluator.patternStr, + evaluator.symInfo.symbolExprsNames, + evaluator.symInfo.symbolExprsEvaluators, evaluator.symInfo.symbolExprsOIs); + syP.parse(); + evaluator.syFn = syP.getSymbolFunction(); + } + + @Override + public boolean transformsRawInput() + { + return false; + } + + @Override + public void initializeOutputOI() throws HiveException { + try { + MatchPath evaluator = (MatchPath) getEvaluator(); + PartitionedTableFunctionDef tDef = evaluator.getTableDef(); + + ArrayList args = tDef.getArgs(); + int argsNum = args.size(); + + validateAndSetupPatternStr(evaluator, args); + validateAndSetupSymbolInfo(evaluator, args, argsNum); + validateAndSetupResultExprStr(evaluator, args, argsNum); + setupSymbolFunctionChain(evaluator); + + /* + * setup OI for input to resultExpr select list + */ + StructObjectInspector selectListInputOI = MatchPath.createSelectListOI( evaluator, + tDef.getInput()); + ResultExprInfo resultExprInfo = evaluator.resultExprInfo; + ArrayList selectListExprOIs = new ArrayList(); + resultExprInfo.resultExprEvals = new ArrayList(); + + for(int i=0 ; i < resultExprInfo.resultExprNodes.size(); i++) { + ExprNodeDesc selectColumnExprNode =resultExprInfo.resultExprNodes.get(i); + ExprNodeEvaluator selectColumnExprEval = + ExprNodeEvaluatorFactory.get(selectColumnExprNode); + ObjectInspector selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); + resultExprInfo.resultExprEvals.add(selectColumnExprEval); + selectListExprOIs.add(selectColumnOI); + } + + resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( + resultExprInfo.resultExprNames, selectListExprOIs); + setOutputOI(resultExprInfo.resultOI); + } + catch(SemanticException se) { + throw new HiveException(se); + } + } + + @Override + public ArrayList getOutputColumnNames() { + MatchPath evaluator = (MatchPath) getEvaluator(); + return evaluator.resultExprInfo.getResultExprNames(); + } + + } + + public ResultExprInfo getResultExprInfo() { + return resultExprInfo; + } + + public void setResultExprInfo(ResultExprInfo resultExprInfo) { + this.resultExprInfo = resultExprInfo; + } + + static class SymbolsInfo { + int sz; + ArrayList symbolExprsEvaluators; + ArrayList symbolExprsOIs; + ArrayList symbolExprsNames; + + SymbolsInfo(int sz) + { + this.sz = sz; + symbolExprsEvaluators = new ArrayList(sz); + symbolExprsOIs = new ArrayList(sz); + symbolExprsNames = new ArrayList(sz); + } + + void add(String name, PTFExpressionDef arg) + { + symbolExprsNames.add(name); + symbolExprsEvaluators.add(arg.getExprEvaluator()); + symbolExprsOIs.add(arg.getOI()); + } + } + + public static class ResultExprInfo { + ArrayList resultExprNames; + ArrayList resultExprNodes; + private transient ArrayList resultExprEvals; + private transient StructObjectInspector resultOI; + + public ArrayList getResultExprNames() { + return resultExprNames; + } + public void setResultExprNames(ArrayList resultExprNames) { + this.resultExprNames = resultExprNames; + } + public ArrayList getResultExprNodes() { + return resultExprNodes; + } + public void setResultExprNodes(ArrayList resultExprNodes) { + this.resultExprNodes = resultExprNodes; + } + } + + public static abstract class SymbolFunction + { + SymbolFunctionResult result; + + public SymbolFunction() + { + result = new SymbolFunctionResult(); + } + + public static SymbolFunctionResult match(SymbolFunction syFn, Object row, + PTFPartitionIterator pItr) throws HiveException + { + int resetToIdx = pItr.getIndex() - 1; + try + { + return syFn.match(row, pItr); + } finally + { + pItr.resetToIndex(resetToIdx); + } + } + + protected abstract SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) + throws HiveException; + + protected abstract boolean isOptional(); + } + + public static class Symbol extends SymbolFunction { + ExprNodeEvaluator symbolExprEval; + Converter converter; + + public Symbol(ExprNodeEvaluator symbolExprEval, ObjectInspector symbolOI) + { + this.symbolExprEval = symbolExprEval; + converter = ObjectInspectorConverters.getConverter( + symbolOI, + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); + } + + @Override + protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) + throws HiveException + { + Object val = null; + val = symbolExprEval.evaluate(row); + val = converter.convert(val); + result.matches = ((Boolean) val).booleanValue(); + result.nextRow = pItr.getIndex(); + + return result; + } + + @Override + protected boolean isOptional() + { + return false; + } + } + + public static class Star extends SymbolFunction { + SymbolFunction symbolFn; + + public Star(SymbolFunction symbolFn) + { + this.symbolFn = symbolFn; + } + + @Override + protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) + throws HiveException + { + result.matches = true; + SymbolFunctionResult rowResult = symbolFn.match(row, pItr); + + while (rowResult.matches && pItr.hasNext()) + { + row = pItr.next(); + rowResult = symbolFn.match(row, pItr); + } + + result.nextRow = pItr.getIndex() - 1; + return result; + } + + @Override + protected boolean isOptional() + { + return true; + } + } + + public static class Plus extends SymbolFunction { + SymbolFunction symbolFn; + + public Plus(SymbolFunction symbolFn) + { + this.symbolFn = symbolFn; + } + + @Override + protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) + throws HiveException + { + SymbolFunctionResult rowResult = symbolFn.match(row, pItr); + + if (!rowResult.matches) + { + result.matches = false; + result.nextRow = pItr.getIndex() - 1; + return result; + } + + result.matches = true; + while (rowResult.matches && pItr.hasNext()) + { + row = pItr.next(); + rowResult = symbolFn.match(row, pItr); + } + + result.nextRow = pItr.getIndex() - 1; + return result; + } + + @Override + protected boolean isOptional() + { + return false; + } + } + + public static class Chain extends SymbolFunction + { + ArrayList components; + + public Chain(ArrayList components) + { + this.components = components; + } + + /* + * Iterate over the Symbol Functions in the Chain: + * - If we are not at the end of the Iterator (i.e. row != null ) + * - match the current componentFn + * - if it returns false, then return false + * - otherwise set row to the next row from the Iterator. + * - if we are at the end of the Iterator + * - skip any optional Symbol Fns (star patterns) at the end. + * - but if we come to a non optional Symbol Fn, return false. + * - if we match all Fns in the chain return true. + */ + @Override + protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) + throws HiveException + { + SymbolFunctionResult componentResult = null; + for (SymbolFunction sFn : components) + { + if (row != null) + { + componentResult = sFn.match(row, pItr); + if (!componentResult.matches) + { + result.matches = false; + result.nextRow = componentResult.nextRow; + return result; + } + row = pItr.resetToIndex(componentResult.nextRow); + } + else + { + if (!sFn.isOptional()) + { + result.matches = false; + result.nextRow = componentResult.nextRow; + return result; + } + } + } + + result.matches = true; + result.nextRow = componentResult.nextRow; + return result; + } + + @Override + protected boolean isOptional() + { + return false; + } + } + + + public static class SymbolFunctionResult + { + /* + * does the row match the pattern represented by this SymbolFunction + */ + public boolean matches; + /* + * what is the index of the row beyond the set of rows that match this pattern. + */ + public int nextRow; + } + + public static class SymbolParser + { + String patternStr; + String[] symbols; + HashMap symbolExprEvalMap; + ArrayList symbolFunctions; + Chain symbolFnChain; + + + public SymbolParser(String patternStr, ArrayList symbolNames, + ArrayList symbolExprEvals, ArrayList symbolExprOIs) + { + super(); + this.patternStr = patternStr; + symbolExprEvalMap = new HashMap(); + int sz = symbolNames.size(); + for(int i=0; i < sz; i++) + { + String symbolName = symbolNames.get(i); + ExprNodeEvaluator symbolExprEval = symbolExprEvals.get(i); + ObjectInspector symbolExprOI = symbolExprOIs.get(i); + symbolExprEvalMap.put(symbolName.toLowerCase(), + new Object[] {symbolExprEval, symbolExprOI}); + } + } + + public SymbolFunction getSymbolFunction() + { + return symbolFnChain; + } + + public void parse() throws SemanticException + { + symbols = patternStr.split("\\."); + symbolFunctions = new ArrayList(); + + for(String symbol : symbols) + { + boolean isStar = symbol.endsWith("*"); + boolean isPlus = symbol.endsWith("+"); + + symbol = (isStar || isPlus) ? symbol.substring(0, symbol.length() - 1) : symbol; + Object[] symbolDetails = symbolExprEvalMap.get(symbol.toLowerCase()); + if ( symbolDetails == null ) + { + throw new SemanticException(String.format("Unknown Symbol %s", symbol)); + } + + ExprNodeEvaluator symbolExprEval = (ExprNodeEvaluator) symbolDetails[0]; + ObjectInspector symbolExprOI = (ObjectInspector) symbolDetails[1]; + SymbolFunction sFn = new Symbol(symbolExprEval, symbolExprOI); + + if ( isStar ) + { + sFn = new Star(sFn); + } + else if ( isPlus ) + { + sFn = new Plus(sFn); + } + symbolFunctions.add(sFn); + } + symbolFnChain = new Chain(symbolFunctions); + } + } + + /* + * ResultExpression is a Select List with the following variation: + * - the select keyword is optional. The parser checks if the expression doesn't start with + * select; if not it prefixes it. + * - Window Fn clauses are not permitted. + * - expressions can operate on the input columns plus the psuedo column 'path' + * which is array of + * structs. The shape of the struct is + * the same as the input. + */ + public static class ResultExpressionParser { + String resultExprString; + + RowResolver selectListInputRowResolver; + TypeCheckCtx selectListInputTypeCheckCtx; + StructObjectInspector selectListInputOI; + + ArrayList selectSpec; + + ResultExprInfo resultExprInfo; + + public ResultExpressionParser(String resultExprString, + RowResolver selectListInputRowResolver) + { + this.resultExprString = resultExprString; + this.selectListInputRowResolver = selectListInputRowResolver; + } + + public void translate() throws SemanticException, HiveException + { + setupSelectListInputInfo(); + fixResultExprString(); + parse(); + validateSelectExpr(); + buildSelectListEvaluators(); + } + + public ResultExprInfo getResultExprInfo() { + return resultExprInfo; + } + + private void buildSelectListEvaluators() throws SemanticException, HiveException + { + resultExprInfo = new ResultExprInfo(); + resultExprInfo.resultExprEvals = new ArrayList(); + resultExprInfo.resultExprNames = new ArrayList(); + resultExprInfo.resultExprNodes = new ArrayList(); + //result + ArrayList selectListExprOIs = new ArrayList(); + int i = 0; + for(WindowExpressionSpec expr : selectSpec) + { + String selectColName = expr.getAlias(); + ASTNode selectColumnNode = expr.getExpression(); + ExprNodeDesc selectColumnExprNode = + ResultExpressionParser.buildExprNode(selectColumnNode, + selectListInputTypeCheckCtx); + ExprNodeEvaluator selectColumnExprEval = + ExprNodeEvaluatorFactory.get(selectColumnExprNode); + ObjectInspector selectColumnOI = null; + selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); + + selectColName = getColumnName(selectColName, selectColumnExprNode, i); + + resultExprInfo.resultExprEvals.add(selectColumnExprEval); + selectListExprOIs.add(selectColumnOI); + resultExprInfo.resultExprNodes.add(selectColumnExprNode); + resultExprInfo.resultExprNames.add(selectColName); + i++; + } + + resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( + resultExprInfo.resultExprNames, selectListExprOIs); + } + + private void setupSelectListInputInfo() throws SemanticException + { + selectListInputTypeCheckCtx = new TypeCheckCtx(selectListInputRowResolver); + selectListInputTypeCheckCtx.setUnparseTranslator(null); + /* + * create SelectListOI + */ + selectListInputOI = (StructObjectInspector) + PTFTranslator.getStandardStructOI(selectListInputRowResolver); + } + + private void fixResultExprString() + { + String r = resultExprString.trim(); + String prefix = r.substring(0, 6); + if (!prefix.toLowerCase().equals("select")) + { + r = "select " + r; + } + resultExprString = r; + } + + private void parse() throws SemanticException + { + selectSpec = SemanticAnalyzer.parseSelect(resultExprString); + } + + private void validateSelectExpr() throws SemanticException + { + for (WindowExpressionSpec expr : selectSpec) + { + PTFTranslator.validateNoLeadLagInValueBoundarySpec(expr.getExpression()); + } + } + + private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx) + { + if (alias != null) + { + return alias; + } + else if (exprNode instanceof ExprNodeColumnDesc) + { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) exprNode; + return colDesc.getColumn(); + } + return "matchpath_col_" + colIdx; + } + + public static ExprNodeDesc buildExprNode(ASTNode expr, + TypeCheckCtx typeCheckCtx) throws SemanticException + { + // todo: use SemanticAnalyzer::genExprNodeDesc + // currently SA not available to PTFTranslator. + Map map = TypeCheckProcFactory + .genExprNode(expr, typeCheckCtx); + ExprNodeDesc desc = map.get(expr); + if (desc == null) { + String errMsg = typeCheckCtx.getError(); + if ( errMsg == null) { + errMsg = "Error in parsing "; + } + throw new SemanticException(errMsg); + } + return desc; + } + } + + public static final String PATHATTR_NAME = "tpath"; + + /* + * add array to the list of columns + */ + protected static RowResolver createSelectListRR(MatchPath evaluator, + PTFInputDef inpDef) throws SemanticException { + RowResolver rr = new RowResolver(); + RowResolver inputRR = inpDef.getOutputShape().getRr(); + + evaluator.inputColumnNamesMap = new HashMap(); + ArrayList inputColumnNames = new ArrayList(); + + ArrayList inpColOIs = new ArrayList(); + + for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { + ColumnInfo cInfo = new ColumnInfo(inpCInfo); + String colAlias = cInfo.getAlias(); + + String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); + if (tabColAlias != null) { + colAlias = tabColAlias[1]; + } + ASTNode inExpr = null; + inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR); + if ( inExpr != null ) { + rr.putExpression(inExpr, cInfo); + colAlias = inExpr.toStringTree().toLowerCase(); + } + else { + colAlias = colAlias == null ? cInfo.getInternalName() : colAlias; + rr.put(cInfo.getTabAlias(), colAlias, cInfo); + } + + evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias); + inputColumnNames.add(colAlias); + inpColOIs.add(cInfo.getObjectInspector()); + } + + StandardListObjectInspector pathAttrOI = + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, + inpColOIs)); + + ColumnInfo pathColumn = new ColumnInfo(PATHATTR_NAME, + TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), + null, + false, false); + rr.put(null, PATHATTR_NAME, pathColumn); + + return rr; + } + + protected static StructObjectInspector createSelectListOI(MatchPath evaluator, PTFInputDef inpDef) { + StructObjectInspector inOI = inpDef.getOutputShape().getOI(); + ArrayList inputColumnNames = new ArrayList(); + ArrayList selectListNames = new ArrayList(); + ArrayList fieldOIs = new ArrayList(); + for(StructField f : inOI.getAllStructFieldRefs()) { + String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName()); + if ( inputColName != null ) { + inputColumnNames.add(inputColName); + selectListNames.add(f.getFieldName()); + fieldOIs.add(f.getFieldObjectInspector()); + } + } + + StandardListObjectInspector pathAttrOI = + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, + fieldOIs)); + + ArrayList selectFieldOIs = new ArrayList(); + selectFieldOIs.addAll(fieldOIs); + selectFieldOIs.add(pathAttrOI); + selectListNames.add(MatchPath.PATHATTR_NAME); + return ObjectInspectorFactory.getStandardStructObjectInspector( + selectListNames, selectFieldOIs); + } + + public static Object getSelectListInput(Object currRow, ObjectInspector rowOI, + PTFPartitionIterator pItr, int sz) throws HiveException { + ArrayList oRow = new ArrayList(); + List currRowAsStdObject = (List) ObjectInspectorUtils + .copyToStandardObject(currRow, rowOI); + oRow.addAll(currRowAsStdObject); + oRow.add(getPath(currRow, rowOI, pItr, sz)); + return oRow; + } + + public static ArrayList getPath(Object currRow, ObjectInspector rowOI, + PTFPartitionIterator pItr, int sz) throws HiveException { + int idx = pItr.getIndex() - 1; + ArrayList path = new ArrayList(); + path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); + int pSz = 1; + + while (pSz < sz && pItr.hasNext()) + { + currRow = pItr.next(); + path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); + pSz++; + } + pItr.resetToIndex(idx); + return path; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NPath.java ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NPath.java deleted file mode 100644 index 30956e5..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NPath.java +++ /dev/null @@ -1,919 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf.ptf; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.PTFPartition; -import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.PTFTranslator; -import org.apache.hadoop.hive.ql.parse.RowResolver; -import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.parse.TypeCheckCtx; -import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; -import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.PTFDesc; -import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef; -import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFInputDef; -import org.apache.hadoop.hive.ql.plan.PTFDesc.PartitionedTableFunctionDef; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; - -/** - * return rows that meet a specified pattern. Use symbols to specify a list of expressions - * to match. - * Pattern is used to specify a Path. The results list can contain expressions based on - * the input columns and also the matched Path. - *
    - *
  1. pattern: pattern for the Path. Path is 'dot' separated list of symbols. - * Each element is treated as a symbol. Elements that end in '*' or '+' are interpreted with - * the usual meaning of zero or more, one or more respectively. For e.g. - * "LATE.EARLY*.ONTIMEOREARLY" implies a sequence of flights - * where the first occurrence was LATE, followed by zero or more EARLY flights, - * followed by a ONTIME or EARLY flight. - *
  2. symbols specify a list of name, expression pairs. For e.g. - * 'LATE', arrival_delay > 0, 'EARLY', arrival_delay < 0 , 'ONTIME', arrival_delay == 0. - * These symbols can be used in the Pattern defined above. - *
  3. resultSelectList specified as a select list. - * The expressions in the selectList are evaluated in the context where all the - * input columns are available, plus the attribute - * "tpath" is available. Path is a collection of rows that represents the matching Path. - *
- */ -public class NPath extends TableFunctionEvaluator -{ - private transient String patternStr; - private transient SymbolsInfo symInfo; - private transient String resultExprStr; - private transient SymbolFunction syFn; - private ResultExprInfo resultExprInfo; - /* - * the names of the Columns of the input to NPath. Used to setup the tpath Struct column. - */ - private HashMap inputColumnNamesMap; - - @Override - public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws HiveException - { - while (pItr.hasNext()) - { - Object iRow = pItr.next(); - - SymbolFunctionResult syFnRes = SymbolFunction.match(syFn, iRow, pItr); - if (syFnRes.matches ) - { - int sz = syFnRes.nextRow - (pItr.getIndex() - 1); - Object selectListInput = NPath.getSelectListInput(iRow, - tDef.getInput().getOutputShape().getOI(), pItr, sz); - ArrayList oRow = new ArrayList(); - for(ExprNodeEvaluator resExprEval : resultExprInfo.resultExprEvals) - { - oRow.add(resExprEval.evaluate(selectListInput)); - } - outP.append(oRow); - } - } - } - - static void throwErrorWithSignature(String message) throws SemanticException - { - throw new SemanticException(String.format( - "NPath signature is: SymbolPattern, one or more SymbolName, " + - "expression pairs, the result expression as a select list. Error %s", - message)); - } - - public HashMap getInputColumnNames() { - return inputColumnNamesMap; - } - - public void setInputColumnNames(HashMap inputColumnNamesMap) { - this.inputColumnNamesMap = inputColumnNamesMap; - } - - public static class NPathResolver extends TableFunctionResolver - { - - @Override - protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, - PartitionedTableFunctionDef tDef) - { - - return new NPath(); - } - - /** - *
    - *
  • check structure of Arguments: - *
      - *
    1. First arg should be a String - *
    2. then there should be an even number of Arguments: - * String, expression; expression should be Convertible to Boolean. - *
    3. finally there should be a String. - *
    - *
  • convert pattern into a NNode chain. - *
  • convert symbol args into a Symbol Map. - *
  • parse selectList into SelectList struct. The inputOI used to translate - * these expressions should be based on the - * columns in the Input, the 'path.attr' - *
- */ - @Override - public void setupOutputOI() throws SemanticException - { - NPath evaluator = (NPath) getEvaluator(); - PartitionedTableFunctionDef tDef = evaluator.getTableDef(); - - ArrayList args = tDef.getArgs(); - int argsNum = args == null ? 0 : args.size(); - - if ( argsNum < 4 ) - { - throwErrorWithSignature("at least 4 arguments required"); - } - - validateAndSetupPatternStr(evaluator, args); - validateAndSetupSymbolInfo(evaluator, args, argsNum); - validateAndSetupResultExprStr(evaluator, args, argsNum); - setupSymbolFunctionChain(evaluator); - - /* - * setup OI for input to resultExpr select list - */ - RowResolver selectListInputRR = NPath.createSelectListRR(evaluator, tDef.getInput()); - - /* - * parse ResultExpr Str and setup OI. - */ - ResultExpressionParser resultExprParser = - new ResultExpressionParser(evaluator.resultExprStr, selectListInputRR); - try { - resultExprParser.translate(); - } - catch(HiveException he) { - throw new SemanticException(he); - } - evaluator.resultExprInfo = resultExprParser.getResultExprInfo(); - StructObjectInspector OI = evaluator.resultExprInfo.resultOI; - - setOutputOI(OI); - } - /* - * validate and setup patternStr - */ - private void validateAndSetupPatternStr(NPath evaluator, - ArrayList args) throws SemanticException { - PTFExpressionDef symboPatternArg = args.get(0); - ObjectInspector symbolPatternArgOI = symboPatternArg.getOI(); - - if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) || - (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symbolPatternArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { - throwErrorWithSignature("Currently the symbol Pattern must be a Constant String."); - } - - evaluator.patternStr = ((ConstantObjectInspector)symbolPatternArgOI). - getWritableConstantValue().toString(); - } - - /* - * validate and setup SymbolInfo - */ - private void validateAndSetupSymbolInfo(NPath evaluator, - ArrayList args, - int argsNum) throws SemanticException { - int symbolArgsSz = argsNum - 2; - if ( symbolArgsSz % 2 != 0) - { - throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " + - "there are odd number of symbol args"); - } - - evaluator.symInfo = new SymbolsInfo(symbolArgsSz/2); - for(int i=1; i <= symbolArgsSz; i += 2) - { - PTFExpressionDef symbolNameArg = args.get(i); - ObjectInspector symbolNameArgOI = symbolNameArg.getOI(); - - if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) || - (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symbolNameArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { - throwErrorWithSignature( - String.format("Currently a Symbol Name(%s) must be a Constant String", - symbolNameArg.getExpressionTreeString())); - } - String symbolName = ((ConstantObjectInspector)symbolNameArgOI). - getWritableConstantValue().toString(); - - PTFExpressionDef symolExprArg = args.get(i+1); - ObjectInspector symolExprArgOI = symolExprArg.getOI(); - if ( (symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)symolExprArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN ) - { - throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " + - "must be a boolean expression", symolExprArg.getExpressionTreeString())); - } - evaluator.symInfo.add(symbolName, symolExprArg); - } - } - - /* - * validate and setup resultExprStr - */ - private void validateAndSetupResultExprStr(NPath evaluator, - ArrayList args, - int argsNum) throws SemanticException { - PTFExpressionDef resultExprArg = args.get(argsNum - 1); - ObjectInspector resultExprArgOI = resultExprArg.getOI(); - - if ( !ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) || - (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || - ((PrimitiveObjectInspector)resultExprArgOI).getPrimitiveCategory() != - PrimitiveObjectInspector.PrimitiveCategory.STRING ) - { - throwErrorWithSignature("Currently the result Expr parameter must be a Constant String."); - } - - evaluator.resultExprStr = ((ConstantObjectInspector)resultExprArgOI). - getWritableConstantValue().toString(); - } - - /* - * setup SymbolFunction chain. - */ - private void setupSymbolFunctionChain(NPath evaluator) throws SemanticException { - SymbolParser syP = new SymbolParser(evaluator.patternStr, - evaluator.symInfo.symbolExprsNames, - evaluator.symInfo.symbolExprsEvaluators, evaluator.symInfo.symbolExprsOIs); - syP.parse(); - evaluator.syFn = syP.getSymbolFunction(); - } - - @Override - public boolean transformsRawInput() - { - return false; - } - - @Override - public void initializeOutputOI() throws HiveException { - try { - NPath evaluator = (NPath) getEvaluator(); - PartitionedTableFunctionDef tDef = evaluator.getTableDef(); - - ArrayList args = tDef.getArgs(); - int argsNum = args.size(); - - validateAndSetupPatternStr(evaluator, args); - validateAndSetupSymbolInfo(evaluator, args, argsNum); - validateAndSetupResultExprStr(evaluator, args, argsNum); - setupSymbolFunctionChain(evaluator); - - /* - * setup OI for input to resultExpr select list - */ - StructObjectInspector selectListInputOI = NPath.createSelectListOI( evaluator, - tDef.getInput()); - ResultExprInfo resultExprInfo = evaluator.resultExprInfo; - ArrayList selectListExprOIs = new ArrayList(); - resultExprInfo.resultExprEvals = new ArrayList(); - - for(int i=0 ; i < resultExprInfo.resultExprNodes.size(); i++) { - ExprNodeDesc selectColumnExprNode =resultExprInfo.resultExprNodes.get(i); - ExprNodeEvaluator selectColumnExprEval = - ExprNodeEvaluatorFactory.get(selectColumnExprNode); - ObjectInspector selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); - resultExprInfo.resultExprEvals.add(selectColumnExprEval); - selectListExprOIs.add(selectColumnOI); - } - - resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( - resultExprInfo.resultExprNames, selectListExprOIs); - setOutputOI(resultExprInfo.resultOI); - } - catch(SemanticException se) { - throw new HiveException(se); - } - } - - @Override - public ArrayList getOutputColumnNames() { - NPath evaluator = (NPath) getEvaluator(); - return evaluator.resultExprInfo.getResultExprNames(); - } - - } - - public ResultExprInfo getResultExprInfo() { - return resultExprInfo; - } - - public void setResultExprInfo(ResultExprInfo resultExprInfo) { - this.resultExprInfo = resultExprInfo; - } - - static class SymbolsInfo { - int sz; - ArrayList symbolExprsEvaluators; - ArrayList symbolExprsOIs; - ArrayList symbolExprsNames; - - SymbolsInfo(int sz) - { - this.sz = sz; - symbolExprsEvaluators = new ArrayList(sz); - symbolExprsOIs = new ArrayList(sz); - symbolExprsNames = new ArrayList(sz); - } - - void add(String name, PTFExpressionDef arg) - { - symbolExprsNames.add(name); - symbolExprsEvaluators.add(arg.getExprEvaluator()); - symbolExprsOIs.add(arg.getOI()); - } - } - - public static class ResultExprInfo { - ArrayList resultExprNames; - ArrayList resultExprNodes; - private transient ArrayList resultExprEvals; - private transient StructObjectInspector resultOI; - - public ArrayList getResultExprNames() { - return resultExprNames; - } - public void setResultExprNames(ArrayList resultExprNames) { - this.resultExprNames = resultExprNames; - } - public ArrayList getResultExprNodes() { - return resultExprNodes; - } - public void setResultExprNodes(ArrayList resultExprNodes) { - this.resultExprNodes = resultExprNodes; - } - } - - public static abstract class SymbolFunction - { - SymbolFunctionResult result; - - public SymbolFunction() - { - result = new SymbolFunctionResult(); - } - - public static SymbolFunctionResult match(SymbolFunction syFn, Object row, - PTFPartitionIterator pItr) throws HiveException - { - int resetToIdx = pItr.getIndex() - 1; - try - { - return syFn.match(row, pItr); - } finally - { - pItr.resetToIndex(resetToIdx); - } - } - - protected abstract SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException; - - protected abstract boolean isOptional(); - } - - public static class Symbol extends SymbolFunction { - ExprNodeEvaluator symbolExprEval; - Converter converter; - - public Symbol(ExprNodeEvaluator symbolExprEval, ObjectInspector symbolOI) - { - this.symbolExprEval = symbolExprEval; - converter = ObjectInspectorConverters.getConverter( - symbolOI, - PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); - } - - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - Object val = null; - val = symbolExprEval.evaluate(row); - val = converter.convert(val); - result.matches = ((Boolean) val).booleanValue(); - result.nextRow = pItr.getIndex(); - - return result; - } - - @Override - protected boolean isOptional() - { - return false; - } - } - - public static class Star extends SymbolFunction { - SymbolFunction symbolFn; - - public Star(SymbolFunction symbolFn) - { - this.symbolFn = symbolFn; - } - - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - result.matches = true; - SymbolFunctionResult rowResult = symbolFn.match(row, pItr); - - while (rowResult.matches && pItr.hasNext()) - { - row = pItr.next(); - rowResult = symbolFn.match(row, pItr); - } - - result.nextRow = pItr.getIndex() - 1; - return result; - } - - @Override - protected boolean isOptional() - { - return true; - } - } - - public static class Plus extends SymbolFunction { - SymbolFunction symbolFn; - - public Plus(SymbolFunction symbolFn) - { - this.symbolFn = symbolFn; - } - - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - SymbolFunctionResult rowResult = symbolFn.match(row, pItr); - - if (!rowResult.matches) - { - result.matches = false; - result.nextRow = pItr.getIndex() - 1; - return result; - } - - result.matches = true; - while (rowResult.matches && pItr.hasNext()) - { - row = pItr.next(); - rowResult = symbolFn.match(row, pItr); - } - - result.nextRow = pItr.getIndex() - 1; - return result; - } - - @Override - protected boolean isOptional() - { - return false; - } - } - - public static class Chain extends SymbolFunction - { - ArrayList components; - - public Chain(ArrayList components) - { - this.components = components; - } - - /* - * Iterate over the Symbol Functions in the Chain: - * - If we are not at the end of the Iterator (i.e. row != null ) - * - match the current componentFn - * - if it returns false, then return false - * - otherwise set row to the next row from the Iterator. - * - if we are at the end of the Iterator - * - skip any optional Symbol Fns (star patterns) at the end. - * - but if we come to a non optional Symbol Fn, return false. - * - if we match all Fns in the chain return true. - */ - @Override - protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) - throws HiveException - { - SymbolFunctionResult componentResult = null; - for (SymbolFunction sFn : components) - { - if (row != null) - { - componentResult = sFn.match(row, pItr); - if (!componentResult.matches) - { - result.matches = false; - result.nextRow = componentResult.nextRow; - return result; - } - row = pItr.resetToIndex(componentResult.nextRow); - } - else - { - if (!sFn.isOptional()) - { - result.matches = false; - result.nextRow = componentResult.nextRow; - return result; - } - } - } - - result.matches = true; - result.nextRow = componentResult.nextRow; - return result; - } - - @Override - protected boolean isOptional() - { - return false; - } - } - - - public static class SymbolFunctionResult - { - /* - * does the row match the pattern represented by this SymbolFunction - */ - public boolean matches; - /* - * what is the index of the row beyond the set of rows that match this pattern. - */ - public int nextRow; - } - - public static class SymbolParser - { - String patternStr; - String[] symbols; - HashMap symbolExprEvalMap; - ArrayList symbolFunctions; - Chain symbolFnChain; - - - public SymbolParser(String patternStr, ArrayList symbolNames, - ArrayList symbolExprEvals, ArrayList symbolExprOIs) - { - super(); - this.patternStr = patternStr; - symbolExprEvalMap = new HashMap(); - int sz = symbolNames.size(); - for(int i=0; i < sz; i++) - { - String symbolName = symbolNames.get(i); - ExprNodeEvaluator symbolExprEval = symbolExprEvals.get(i); - ObjectInspector symbolExprOI = symbolExprOIs.get(i); - symbolExprEvalMap.put(symbolName.toLowerCase(), - new Object[] {symbolExprEval, symbolExprOI}); - } - } - - public SymbolFunction getSymbolFunction() - { - return symbolFnChain; - } - - public void parse() throws SemanticException - { - symbols = patternStr.split("\\."); - symbolFunctions = new ArrayList(); - - for(String symbol : symbols) - { - boolean isStar = symbol.endsWith("*"); - boolean isPlus = symbol.endsWith("+"); - - symbol = (isStar || isPlus) ? symbol.substring(0, symbol.length() - 1) : symbol; - Object[] symbolDetails = symbolExprEvalMap.get(symbol.toLowerCase()); - if ( symbolDetails == null ) - { - throw new SemanticException(String.format("Unknown Symbol %s", symbol)); - } - - ExprNodeEvaluator symbolExprEval = (ExprNodeEvaluator) symbolDetails[0]; - ObjectInspector symbolExprOI = (ObjectInspector) symbolDetails[1]; - SymbolFunction sFn = new Symbol(symbolExprEval, symbolExprOI); - - if ( isStar ) - { - sFn = new Star(sFn); - } - else if ( isPlus ) - { - sFn = new Plus(sFn); - } - symbolFunctions.add(sFn); - } - symbolFnChain = new Chain(symbolFunctions); - } - } - - /* - * ResultExpression is a Select List with the following variation: - * - the select keyword is optional. The parser checks if the expression doesn't start with - * select; if not it prefixes it. - * - Window Fn clauses are not permitted. - * - expressions can operate on the input columns plus the psuedo column 'path' - * which is array of - * structs. The shape of the struct is - * the same as the input. - */ - public static class ResultExpressionParser { - String resultExprString; - - RowResolver selectListInputRowResolver; - TypeCheckCtx selectListInputTypeCheckCtx; - StructObjectInspector selectListInputOI; - - ArrayList selectSpec; - - ResultExprInfo resultExprInfo; - - public ResultExpressionParser(String resultExprString, - RowResolver selectListInputRowResolver) - { - this.resultExprString = resultExprString; - this.selectListInputRowResolver = selectListInputRowResolver; - } - - public void translate() throws SemanticException, HiveException - { - setupSelectListInputInfo(); - fixResultExprString(); - parse(); - validateSelectExpr(); - buildSelectListEvaluators(); - } - - public ResultExprInfo getResultExprInfo() { - return resultExprInfo; - } - - private void buildSelectListEvaluators() throws SemanticException, HiveException - { - resultExprInfo = new ResultExprInfo(); - resultExprInfo.resultExprEvals = new ArrayList(); - resultExprInfo.resultExprNames = new ArrayList(); - resultExprInfo.resultExprNodes = new ArrayList(); - //result - ArrayList selectListExprOIs = new ArrayList(); - int i = 0; - for(WindowExpressionSpec expr : selectSpec) - { - String selectColName = expr.getAlias(); - ASTNode selectColumnNode = expr.getExpression(); - ExprNodeDesc selectColumnExprNode = - ResultExpressionParser.buildExprNode(selectColumnNode, - selectListInputTypeCheckCtx); - ExprNodeEvaluator selectColumnExprEval = - ExprNodeEvaluatorFactory.get(selectColumnExprNode); - ObjectInspector selectColumnOI = null; - selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); - - selectColName = getColumnName(selectColName, selectColumnExprNode, i); - - resultExprInfo.resultExprEvals.add(selectColumnExprEval); - selectListExprOIs.add(selectColumnOI); - resultExprInfo.resultExprNodes.add(selectColumnExprNode); - resultExprInfo.resultExprNames.add(selectColName); - i++; - } - - resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( - resultExprInfo.resultExprNames, selectListExprOIs); - } - - private void setupSelectListInputInfo() throws SemanticException - { - selectListInputTypeCheckCtx = new TypeCheckCtx(selectListInputRowResolver); - selectListInputTypeCheckCtx.setUnparseTranslator(null); - /* - * create SelectListOI - */ - selectListInputOI = (StructObjectInspector) - PTFTranslator.getStandardStructOI(selectListInputRowResolver); - } - - private void fixResultExprString() - { - String r = resultExprString.trim(); - String prefix = r.substring(0, 6); - if (!prefix.toLowerCase().equals("select")) - { - r = "select " + r; - } - resultExprString = r; - } - - private void parse() throws SemanticException - { - selectSpec = SemanticAnalyzer.parseSelect(resultExprString); - } - - private void validateSelectExpr() throws SemanticException - { - for (WindowExpressionSpec expr : selectSpec) - { - PTFTranslator.validateNoLeadLagInValueBoundarySpec(expr.getExpression()); - } - } - - private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx) - { - if (alias != null) - { - return alias; - } - else if (exprNode instanceof ExprNodeColumnDesc) - { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) exprNode; - return colDesc.getColumn(); - } - return "npath_col_" + colIdx; - } - - public static ExprNodeDesc buildExprNode(ASTNode expr, - TypeCheckCtx typeCheckCtx) throws SemanticException - { - // todo: use SemanticAnalyzer::genExprNodeDesc - // currently SA not available to PTFTranslator. - Map map = TypeCheckProcFactory - .genExprNode(expr, typeCheckCtx); - ExprNodeDesc desc = map.get(expr); - if (desc == null) { - String errMsg = typeCheckCtx.getError(); - if ( errMsg == null) { - errMsg = "Error in parsing "; - } - throw new SemanticException(errMsg); - } - return desc; - } - } - - public static final String PATHATTR_NAME = "tpath"; - - /* - * add array to the list of columns - */ - protected static RowResolver createSelectListRR(NPath evaluator, - PTFInputDef inpDef) throws SemanticException { - RowResolver rr = new RowResolver(); - RowResolver inputRR = inpDef.getOutputShape().getRr(); - - evaluator.inputColumnNamesMap = new HashMap(); - ArrayList inputColumnNames = new ArrayList(); - - ArrayList inpColOIs = new ArrayList(); - - for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { - ColumnInfo cInfo = new ColumnInfo(inpCInfo); - String colAlias = cInfo.getAlias(); - - String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); - if (tabColAlias != null) { - colAlias = tabColAlias[1]; - } - ASTNode inExpr = null; - inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR); - if ( inExpr != null ) { - rr.putExpression(inExpr, cInfo); - colAlias = inExpr.toStringTree().toLowerCase(); - } - else { - colAlias = colAlias == null ? cInfo.getInternalName() : colAlias; - rr.put(cInfo.getTabAlias(), colAlias, cInfo); - } - - evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias); - inputColumnNames.add(colAlias); - inpColOIs.add(cInfo.getObjectInspector()); - } - - StandardListObjectInspector pathAttrOI = - ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, - inpColOIs)); - - ColumnInfo pathColumn = new ColumnInfo(PATHATTR_NAME, - TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), - null, - false, false); - rr.put(null, PATHATTR_NAME, pathColumn); - - return rr; - } - - protected static StructObjectInspector createSelectListOI(NPath evaluator, PTFInputDef inpDef) { - StructObjectInspector inOI = inpDef.getOutputShape().getOI(); - ArrayList inputColumnNames = new ArrayList(); - ArrayList selectListNames = new ArrayList(); - ArrayList fieldOIs = new ArrayList(); - for(StructField f : inOI.getAllStructFieldRefs()) { - String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName()); - if ( inputColName != null ) { - inputColumnNames.add(inputColName); - selectListNames.add(f.getFieldName()); - fieldOIs.add(f.getFieldObjectInspector()); - } - } - - StandardListObjectInspector pathAttrOI = - ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, - fieldOIs)); - - ArrayList selectFieldOIs = new ArrayList(); - selectFieldOIs.addAll(fieldOIs); - selectFieldOIs.add(pathAttrOI); - selectListNames.add(NPath.PATHATTR_NAME); - return ObjectInspectorFactory.getStandardStructObjectInspector( - selectListNames, selectFieldOIs); - } - - public static Object getSelectListInput(Object currRow, ObjectInspector rowOI, - PTFPartitionIterator pItr, int sz) throws HiveException { - ArrayList oRow = new ArrayList(); - List currRowAsStdObject = (List) ObjectInspectorUtils - .copyToStandardObject(currRow, rowOI); - oRow.addAll(currRowAsStdObject); - oRow.add(getPath(currRow, rowOI, pItr, sz)); - return oRow; - } - - public static ArrayList getPath(Object currRow, ObjectInspector rowOI, - PTFPartitionIterator pItr, int sz) throws HiveException { - int idx = pItr.getIndex() - 1; - ArrayList path = new ArrayList(); - path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); - int pSz = 1; - - while (pSz < sz && pItr.hasNext()) - { - currRow = pItr.next(); - path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); - pSz++; - } - pItr.resetToIndex(idx); - return path; - } -} diff --git ql/src/test/queries/clientpositive/ptf_matchpath.q ql/src/test/queries/clientpositive/ptf_matchpath.q new file mode 100644 index 0000000..72eeb10 --- /dev/null +++ ql/src/test/queries/clientpositive/ptf_matchpath.q @@ -0,0 +1,36 @@ +DROP TABLE flights_tiny; + +create table flights_tiny ( +ORIGIN_CITY_NAME string, +DEST_CITY_NAME string, +YEAR int, +MONTH int, +DAY_OF_MONTH int, +ARR_DELAY float, +FL_NUM string +); + +LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny; + +-- 1. basic Matchpath test +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +from matchpath(on + flights_tiny + distribute by fl_num + sort by year, month, day_of_month + arg1('LATE.LATE+'), + arg2('LATE'), arg3(arr_delay > 15), + arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') + ); + +-- 2. Matchpath on 1 partition +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +from matchpath(on + flights_tiny + sort by fl_num, year, month, day_of_month + arg1('LATE.LATE+'), + arg2('LATE'), arg3(arr_delay > 15), + arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') + ) +where fl_num = 1142; + \ No newline at end of file diff --git ql/src/test/queries/clientpositive/ptf_npath.q ql/src/test/queries/clientpositive/ptf_npath.q deleted file mode 100644 index 4064801..0000000 --- ql/src/test/queries/clientpositive/ptf_npath.q +++ /dev/null @@ -1,36 +0,0 @@ -DROP TABLE flights_tiny; - -create table flights_tiny ( -ORIGIN_CITY_NAME string, -DEST_CITY_NAME string, -YEAR int, -MONTH int, -DAY_OF_MONTH int, -ARR_DELAY float, -FL_NUM string -); - -LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny; - --- 1. basic Npath test -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - distribute by fl_num - sort by year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ); - --- 2. Npath on 1 partition -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - sort by fl_num, year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ) -where fl_num = 1142; - \ No newline at end of file diff --git ql/src/test/queries/clientpositive/ptf_register_tblfn.q ql/src/test/queries/clientpositive/ptf_register_tblfn.q index 490c1b1..a2140cd 100644 --- ql/src/test/queries/clientpositive/ptf_register_tblfn.q +++ ql/src/test/queries/clientpositive/ptf_register_tblfn.q @@ -12,12 +12,12 @@ FL_NUM string LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny; -create temporary function npathtest as 'org.apache.hadoop.hive.ql.udf.ptf.NPath$NPathResolver'; +create temporary function matchpathtest as 'org.apache.hadoop.hive.ql.udf.ptf.MatchPath$MatchPathResolver'; --- 1. basic Npath test +-- 1. basic Matchpath test select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npathtest(on +from matchpathtest(on flights_tiny distribute by fl_num sort by year, month, day_of_month @@ -26,4 +26,4 @@ from npathtest(on arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') ); -drop temporary function npathtest; +drop temporary function matchpathtest; diff --git ql/src/test/results/clientpositive/ptf_matchpath.q.out ql/src/test/results/clientpositive/ptf_matchpath.q.out new file mode 100644 index 0000000..ed57dcb --- /dev/null +++ ql/src/test/results/clientpositive/ptf_matchpath.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: DROP TABLE flights_tiny +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE flights_tiny +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table flights_tiny ( +ORIGIN_CITY_NAME string, +DEST_CITY_NAME string, +YEAR int, +MONTH int, +DAY_OF_MONTH int, +ARR_DELAY float, +FL_NUM string +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table flights_tiny ( +ORIGIN_CITY_NAME string, +DEST_CITY_NAME string, +YEAR int, +MONTH int, +DAY_OF_MONTH int, +ARR_DELAY float, +FL_NUM string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@flights_tiny +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny +PREHOOK: type: LOAD +PREHOOK: Output: default@flights_tiny +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny +POSTHOOK: type: LOAD +POSTHOOK: Output: default@flights_tiny +PREHOOK: query: -- 1. basic Matchpath test +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +from matchpath(on + flights_tiny + distribute by fl_num + sort by year, month, day_of_month + arg1('LATE.LATE+'), + arg2('LATE'), arg3(arr_delay > 15), + arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@flights_tiny +#### A masked pattern was here #### +POSTHOOK: query: -- 1. basic Matchpath test +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +from matchpath(on + flights_tiny + distribute by fl_num + sort by year, month, day_of_month + arg1('LATE.LATE+'), + arg2('LATE'), arg3(arr_delay > 15), + arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@flights_tiny +#### A masked pattern was here #### +Baltimore 1142 2010 10 20 6 20 +Baltimore 1142 2010 10 21 5 21 +Baltimore 1142 2010 10 22 4 22 +Baltimore 1142 2010 10 25 3 25 +Baltimore 1142 2010 10 26 2 26 +Chicago 1531 2010 10 21 2 21 +Chicago 1531 2010 10 25 3 25 +Chicago 1531 2010 10 26 2 26 +Baltimore 1599 2010 10 21 2 21 +Baltimore 1599 2010 10 25 3 25 +Baltimore 1599 2010 10 26 2 26 +Chicago 361 2010 10 20 2 20 +Washington 7291 2010 10 27 2 27 +Chicago 897 2010 10 20 4 20 +Chicago 897 2010 10 21 3 21 +Chicago 897 2010 10 22 2 22 +PREHOOK: query: -- 2. Matchpath on 1 partition +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +from matchpath(on + flights_tiny + sort by fl_num, year, month, day_of_month + arg1('LATE.LATE+'), + arg2('LATE'), arg3(arr_delay > 15), + arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') + ) +where fl_num = 1142 +PREHOOK: type: QUERY +PREHOOK: Input: default@flights_tiny +#### A masked pattern was here #### +POSTHOOK: query: -- 2. Matchpath on 1 partition +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +from matchpath(on + flights_tiny + sort by fl_num, year, month, day_of_month + arg1('LATE.LATE+'), + arg2('LATE'), arg3(arr_delay > 15), + arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') + ) +where fl_num = 1142 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@flights_tiny +#### A masked pattern was here #### +Baltimore 1142 2010 10 20 6 20 +Baltimore 1142 2010 10 21 5 21 +Baltimore 1142 2010 10 22 4 22 +Baltimore 1142 2010 10 25 3 25 +Baltimore 1142 2010 10 26 2 26 diff --git ql/src/test/results/clientpositive/ptf_npath.q.out ql/src/test/results/clientpositive/ptf_npath.q.out deleted file mode 100644 index ea8b822..0000000 --- ql/src/test/results/clientpositive/ptf_npath.q.out +++ /dev/null @@ -1,104 +0,0 @@ -PREHOOK: query: DROP TABLE flights_tiny -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE flights_tiny -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table flights_tiny ( -ORIGIN_CITY_NAME string, -DEST_CITY_NAME string, -YEAR int, -MONTH int, -DAY_OF_MONTH int, -ARR_DELAY float, -FL_NUM string -) -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table flights_tiny ( -ORIGIN_CITY_NAME string, -DEST_CITY_NAME string, -YEAR int, -MONTH int, -DAY_OF_MONTH int, -ARR_DELAY float, -FL_NUM string -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@flights_tiny -PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny -PREHOOK: type: LOAD -PREHOOK: Output: default@flights_tiny -POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny -POSTHOOK: type: LOAD -POSTHOOK: Output: default@flights_tiny -PREHOOK: query: -- 1. basic Npath test -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - distribute by fl_num - sort by year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@flights_tiny -#### A masked pattern was here #### -POSTHOOK: query: -- 1. basic Npath test -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - distribute by fl_num - sort by year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@flights_tiny -#### A masked pattern was here #### -Baltimore 1142 2010 10 20 6 20 -Baltimore 1142 2010 10 21 5 21 -Baltimore 1142 2010 10 22 4 22 -Baltimore 1142 2010 10 25 3 25 -Baltimore 1142 2010 10 26 2 26 -Chicago 1531 2010 10 21 2 21 -Chicago 1531 2010 10 25 3 25 -Chicago 1531 2010 10 26 2 26 -Baltimore 1599 2010 10 21 2 21 -Baltimore 1599 2010 10 25 3 25 -Baltimore 1599 2010 10 26 2 26 -Chicago 361 2010 10 20 2 20 -Washington 7291 2010 10 27 2 27 -Chicago 897 2010 10 20 4 20 -Chicago 897 2010 10 21 3 21 -Chicago 897 2010 10 22 2 22 -PREHOOK: query: -- 2. Npath on 1 partition -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - sort by fl_num, year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ) -where fl_num = 1142 -PREHOOK: type: QUERY -PREHOOK: Input: default@flights_tiny -#### A masked pattern was here #### -POSTHOOK: query: -- 2. Npath on 1 partition -select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npath(on - flights_tiny - sort by fl_num, year, month, day_of_month - arg1('LATE.LATE+'), - arg2('LATE'), arg3(arr_delay > 15), - arg4('origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath') - ) -where fl_num = 1142 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@flights_tiny -#### A masked pattern was here #### -Baltimore 1142 2010 10 20 6 20 -Baltimore 1142 2010 10 21 5 21 -Baltimore 1142 2010 10 22 4 22 -Baltimore 1142 2010 10 25 3 25 -Baltimore 1142 2010 10 26 2 26 diff --git ql/src/test/results/clientpositive/ptf_register_tblfn.q.out ql/src/test/results/clientpositive/ptf_register_tblfn.q.out index 2808e1f..0e99d6d 100644 --- ql/src/test/results/clientpositive/ptf_register_tblfn.q.out +++ ql/src/test/results/clientpositive/ptf_register_tblfn.q.out @@ -29,13 +29,13 @@ PREHOOK: Output: default@flights_tiny POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny POSTHOOK: type: LOAD POSTHOOK: Output: default@flights_tiny -PREHOOK: query: create temporary function npathtest as 'org.apache.hadoop.hive.ql.udf.ptf.NPath$NPathResolver' +PREHOOK: query: create temporary function matchpathtest as 'org.apache.hadoop.hive.ql.udf.ptf.MatchPath$MatchPathResolver' PREHOOK: type: CREATEFUNCTION -POSTHOOK: query: create temporary function npathtest as 'org.apache.hadoop.hive.ql.udf.ptf.NPath$NPathResolver' +POSTHOOK: query: create temporary function matchpathtest as 'org.apache.hadoop.hive.ql.udf.ptf.MatchPath$MatchPathResolver' POSTHOOK: type: CREATEFUNCTION -PREHOOK: query: -- 1. basic Npath test +PREHOOK: query: -- 1. basic Matchpath test select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npathtest(on +from matchpathtest(on flights_tiny distribute by fl_num sort by year, month, day_of_month @@ -46,9 +46,9 @@ from npathtest(on PREHOOK: type: QUERY PREHOOK: Input: default@flights_tiny #### A masked pattern was here #### -POSTHOOK: query: -- 1. basic Npath test +POSTHOOK: query: -- 1. basic Matchpath test select origin_city_name, fl_num, year, month, day_of_month, sz, tpath -from npathtest(on +from matchpathtest(on flights_tiny distribute by fl_num sort by year, month, day_of_month @@ -75,7 +75,7 @@ Washington 7291 2010 10 27 2 27 Chicago 897 2010 10 20 4 20 Chicago 897 2010 10 21 3 21 Chicago 897 2010 10 22 2 22 -PREHOOK: query: drop temporary function npathtest +PREHOOK: query: drop temporary function matchpathtest PREHOOK: type: DROPFUNCTION -POSTHOOK: query: drop temporary function npathtest +POSTHOOK: query: drop temporary function matchpathtest POSTHOOK: type: DROPFUNCTION diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index fb945b4..eab00d7 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -106,6 +106,7 @@ ltrim map map_keys map_values +matchpath max min minute @@ -116,7 +117,6 @@ ngrams noop noopwithmap not -npath ntile nvl or