diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java index b5c7be43e9..07fe641b99 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java @@ -57,9 +57,11 @@ boolean mapJoinRemoved = false; boolean hasMapGroupBy = false; + private boolean hasLateralViews = false; + private boolean cboSupportedLateralViews = true; + private int noOfJoins = 0; private int noOfOuterJoins = 0; - private boolean hasLateralViews; private boolean multiDestQuery; private boolean filterWithSubQuery; @@ -142,6 +144,14 @@ public boolean hasLateralViews() { return hasLateralViews; } + public void setCBOSupportedLateralViews(boolean cboSupportedLateralViews) { + this.cboSupportedLateralViews = cboSupportedLateralViews; + } + + public boolean isCBOSupportedLateralViews() { + return cboSupportedLateralViews; + } + public boolean hasGroupBy() { return hasGroupBy; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java index a416ca35c0..ffa2a1f262 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java @@ -48,7 +48,7 @@ * @param columnMappings * columnMappings - Column mappings associated with this function */ - public HiveTableFunctionScan(RelOptCluster cluster, RelTraitSet traitSet, List inputs, + private HiveTableFunctionScan(RelOptCluster cluster, RelTraitSet traitSet, List inputs, RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) { super(cluster, traitSet, inputs, rexCall, elementType, rowType, columnMappings); } @@ -56,17 +56,15 @@ public HiveTableFunctionScan(RelOptCluster cluster, RelTraitSet traitSet, List inputs, RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) throws CalciteSemanticException { - HiveTableFunctionScan hiveTableFunctionScan = new HiveTableFunctionScan(cluster, traitSet, + return new HiveTableFunctionScan(cluster, traitSet, inputs, rexCall, elementType, rowType, columnMappings); - return hiveTableFunctionScan; } @Override public TableFunctionScan copy(RelTraitSet traitSet, List inputs, RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) { - HiveTableFunctionScan htfs = new HiveTableFunctionScan(getCluster(), traitSet, inputs, rexCall, + return new HiveTableFunctionScan(getCluster(), traitSet, inputs, rexCall, elementType, rowType, columnMappings); - return htfs; } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f639a40330..85a1f348e3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -43,6 +43,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.collect.Iterables; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.Tree; @@ -94,6 +95,7 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexExecutor; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; @@ -110,7 +112,9 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlWindow; import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.util.CompositeList; import org.apache.calcite.util.ImmutableBitSet; @@ -127,6 +131,7 @@ import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; @@ -244,7 +249,9 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -252,6 +259,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -746,7 +754,6 @@ public Object post(Object t) { * @param qb * top level QB corresponding to the AST * @param cboCtx - * @param semAnalyzer * @return boolean * * Assumption:
@@ -822,7 +829,7 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() - && !queryProperties.hasLateralViews()) { + && queryProperties.isCBOSupportedLateralViews()) { // Ok to run CBO. return null; } @@ -869,6 +876,34 @@ boolean isCBOExecuted() { return runCBO; } + @Override + boolean isCBOSupportedLateralView(ASTNode lateralView) { + // Lateral view AST has the following shape: + // ^(TOK_LATERAL_VIEW + // ^(TOK_SELECT ^(TOK_SELEXPR ^(TOK_FUNCTION Identifier params) identifier* tableAlias))) + if (lateralView.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { + // LATERAL VIEW OUTER not supported in CBO + return false; + } + // Only INLINE followed by ARRAY supported in CBO + ASTNode lvFunc = (ASTNode) lateralView.getChild(0).getChild(0).getChild(0); + String lvFuncName = lvFunc.getChild(0).getText(); + if (lvFuncName.compareToIgnoreCase( + GenericUDTFInline.class.getAnnotation(Description.class).name()) != 0) { + return false; + } + if (lvFunc.getChildCount() != 2) { + return false; + } + ASTNode innerFunc = (ASTNode) lvFunc.getChild(1); + if (innerFunc.getToken().getType() != HiveParser.TOK_FUNCTION || + innerFunc.getChild(0).getText().compareToIgnoreCase( + GenericUDFArray.class.getAnnotation(Description.class).name()) != 0) { + return false; + } + return true; + } + @Override boolean continueJoinMerge() { return !(runCBO && disableSemJoinReordering); @@ -1244,7 +1279,7 @@ Operator getOptimizedHiveOPDag() throws SemanticException { * Unwraps Calcite Invocation exceptions coming meta data provider chain and * obtains the real cause. * - * @param Exception + * @param e */ private void rethrowCalciteException(Exception e) throws SemanticException { Throwable first = (semanticException != null) ? semanticException : e, current = first, cause = current @@ -2231,7 +2266,6 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r /** * Generate Join Logical Plan Relnode by walking through the join AST. * - * @param qb * @param aliasToRel * Alias(Table/Relation alias) to RelNode; only read and not * written in to by this method @@ -2289,6 +2323,8 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a leftRel = aliasToRel.get(leftTableAlias); } else if (SemanticAnalyzer.isJoinToken(left)) { leftRel = genJoinLogicalPlan(left, aliasToRel); + } else if (left.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) { + leftRel = genLateralViewPlans(left, aliasToRel); } else { assert (false); } @@ -2309,7 +2345,9 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer .unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias; rightRel = aliasToRel.get(rightTableAlias); - } else { + } else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) { + rightRel = genLateralViewPlans(right, aliasToRel); + } else { assert (false); } @@ -2639,6 +2677,163 @@ private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, } } } + + private RelNode genLateralViewPlans(ASTNode lateralView, Map aliasToRel) + throws SemanticException { + final RexBuilder rexBuilder = this.cluster.getRexBuilder(); + final RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); + final String inlineFunctionName = + GenericUDTFInline.class.getAnnotation(Description.class).name(); + int numChildren = lateralView.getChildCount(); + assert (numChildren == 2); + + // 1) Obtain input and all related data structures + ASTNode next = (ASTNode) lateralView.getChild(1); + RelNode inputRel = null; + switch (next.getToken().getType()) { + case HiveParser.TOK_TABREF: + case HiveParser.TOK_SUBQUERY: + case HiveParser.TOK_PTBLFUNCTION: + String inputTableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( + (ASTNode) next.getChild(0)).toLowerCase(); + String inputTableAlias; + if (next.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) { + // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? + // partitionTableFunctionSource partitioningSpec? expression*) + // ptf node guaranteed to have an alias here + inputTableAlias = SemanticAnalyzer.unescapeIdentifier(next.getChild(1).getText().toLowerCase()); + } else { + inputTableAlias = next.getChildCount() == 1 ? inputTableName : + SemanticAnalyzer.unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase()); + } + inputRel = aliasToRel.get(inputTableAlias); + break; + case HiveParser.TOK_LATERAL_VIEW: + inputRel = genLateralViewPlans(next, aliasToRel); + break; + default: + throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(lateralView)); + } + // Input row resolver + RowResolver inputRR = this.relToHiveRR.get(inputRel); + // Extract input refs. They will serve as input for the function invocation + List inputRefs = Lists.transform(inputRel.getRowType().getFieldList(), + input -> new RexInputRef(input.getIndex(), input.getType())); + // Extract type for the arguments + List inputRefsTypes = new ArrayList<>(); + for (int i = 0; i < inputRefs.size(); i++) { + inputRefsTypes.add(inputRefs.get(i).getType()); + } + // Input name to position map + ImmutableMap inputPosMap = this.relToHiveColNameCalcitePosMap.get(inputRel); + + // 2) Generate HiveTableFunctionScan RelNode for lateral view + // TODO: Support different functions (not only INLINE) with LATERAL VIEW JOIN + // ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR ^(TOK_FUNCTION Identifier["inline"] valuesClause) identifier* tableAlias))) + final ASTNode selExprClause = + (ASTNode) lateralView.getChild(0).getChild(0); + final ASTNode functionCall = + (ASTNode) selExprClause.getChild(0); + if (functionCall.getChild(0).getText().compareToIgnoreCase(inlineFunctionName) != 0) { + throw new SemanticException("CBO only supports inline LVJ"); + } + final ASTNode valuesClause = + (ASTNode) functionCall.getChild(1); + // Output types. They will be the concatenation of the input refs types and + // the types of the expressions for the lateral view generated rows + List outputFieldTypes = new ArrayList<>(inputRefsTypes); + List outputFieldNames = new ArrayList<>(inputRel.getRowType().getFieldNames()); + // Generate all expressions from lateral view + ExprNodeDesc valuesExpr = genExprNodeDesc(valuesClause, inputRR, false); + RexCall convertedOriginalValuesExpr = (RexCall) new RexNodeConverter(this.cluster, inputRel.getRowType(), + inputPosMap, 0, false).convert(valuesExpr); + RelDataType valuesRowType = ((ArraySqlType) convertedOriginalValuesExpr.getType()).getComponentType(); + List newStructExprs = new ArrayList<>(); + for (RexNode structExpr : convertedOriginalValuesExpr.getOperands()) { + RexCall structCall = (RexCall) structExpr; + List exprs = new ArrayList<>(inputRefs); + exprs.addAll(structCall.getOperands()); + newStructExprs.add(rexBuilder.makeCall(structCall.op, exprs)); + } + RexNode convertedFinalValuesExpr = + rexBuilder.makeCall(convertedOriginalValuesExpr.op, newStructExprs); + // The return type will be the concatenation of input type and original values type + RelDataType retType = SqlValidatorUtil.deriveJoinRowType(inputRel.getRowType(), + valuesRowType, JoinRelType.INNER, dtFactory, null, ImmutableList.of()); + + // Create inline SQL operator + FunctionInfo inlineFunctionInfo = FunctionRegistry.getFunctionInfo(inlineFunctionName); + SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator( + inlineFunctionName, inlineFunctionInfo.getGenericUDTF(), + ImmutableList.copyOf(inputRefsTypes), retType); + + RelNode htfsRel = HiveTableFunctionScan.create(cluster, TraitsUtil.getDefaultTraitSet(cluster), + ImmutableList.of(inputRel), rexBuilder.makeCall(calciteOp, convertedFinalValuesExpr), + null, retType, null); + + // 3) Keep track of colname-to-posmap && RR for new op + RowResolver outputRR = new RowResolver(); + // Add all input columns + if (!RowResolver.add(outputRR, inputRR)) { + LOG.warn("Duplicates detected when adding columns to RR: see previous message"); + } + // Add all columns from lateral view + // First we extract the information that the query provides + String tableAlias = null; + List columnAliases = new ArrayList<>(); + Set uniqueNames = new HashSet<>(); + for (int i = 1; i < selExprClause.getChildren().size(); i++) { + ASTNode child = (ASTNode) selExprClause.getChild(i); + switch (child.getToken().getType()) { + case HiveParser.TOK_TABALIAS: + tableAlias = unescapeIdentifier(child.getChild(0).getText()); + break; + default: + String colAlias = unescapeIdentifier(child.getText()); + if (uniqueNames.contains(colAlias)) { + // Column aliases defined by query for lateral view output are duplicated + throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS.getMsg(colAlias)); + } + columnAliases.add(colAlias); + uniqueNames.add(colAlias); + } + } + if (tableAlias == null) { + // Parser enforces that table alias is added, but check again + throw new SemanticException("Alias should be specified LVJ"); + } + if (!columnAliases.isEmpty() && + columnAliases.size() != valuesRowType.getFieldCount()) { + // Number of columns in the aliases does not match with number of columns + // generated by the lateral view + throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg()); + } + if (columnAliases.isEmpty()) { + // Auto-generate column aliases + for (int i = 0; i < valuesRowType.getFieldCount(); i++) { + columnAliases.add(SemanticAnalyzer.getColumnInternalName(i)); + } + } + int numInputExprs = inputRR.getColumnInfos().size(); + ListTypeInfo listTypeInfo = (ListTypeInfo) valuesExpr.getTypeInfo(); // Array should have ListTypeInfo + StructTypeInfo typeInfos = (StructTypeInfo) listTypeInfo.getListElementTypeInfo(); // Within the list, we extract types + for (int i = 0, j = 0; i < columnAliases.size(); i++) { + String internalColName; + do { + internalColName = SemanticAnalyzer.getColumnInternalName(j++); + } while (inputRR.getPosition(internalColName) != -1); + outputRR.put(tableAlias, columnAliases.get(i), + new ColumnInfo(internalColName, typeInfos.getAllStructFieldTypeInfos().get(i), + tableAlias, false)); + } + this.relToHiveColNameCalcitePosMap + .put(htfsRel, buildHiveToCalciteColumnMap(outputRR, htfsRel)); + this.relToHiveRR.put(htfsRel, outputRR); + + // 4) Return new operator + return htfsRel; + } + private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause, Map subQueryToRelNode) throws SemanticException { @@ -2696,6 +2891,7 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean } return isSubQuery; } + private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, Map aliasToRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) throws SemanticException { @@ -3200,7 +3396,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * present then don't introduce top constraining select. * * @param qb - * @param srcRel + * @param selPair * @param outermostOB * @return RelNode OB RelNode * @throws SemanticException @@ -4294,7 +4490,13 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, srcRel = genJoinLogicalPlan(qb.getParseInfo().getJoinExpr(), aliasToRel); } else { // If no join then there should only be either 1 TS or 1 SubQuery - srcRel = aliasToRel.values().iterator().next(); + Map.Entry uniqueAliasToRel = aliasToRel.entrySet().iterator().next(); + srcRel = uniqueAliasToRel.getValue(); + // If it contains a LV + List lateralViews = getQBParseInfo(qb).getAliasToLateralViews().get(uniqueAliasToRel.getKey()); + if (lateralViews != null) { + srcRel = genLateralViewPlans(Iterables.getLast(lateralViews), aliasToRel); + } } // 2. Build Rel for where Clause diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g index b038e8185d..e2309af68d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g @@ -161,7 +161,7 @@ lateralView COMMA? KW_LATERAL KW_VIEW function tableAlias (KW_AS identifier ((COMMA)=> COMMA identifier)*)? -> ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR function identifier* tableAlias))) | - COMMA? KW_LATERAL KW_TABLE LPAREN valuesClause RPAREN KW_AS? tableAlias (LPAREN identifier (COMMA identifier)*)? RPAREN + COMMA? KW_LATERAL KW_TABLE LPAREN valuesClause RPAREN KW_AS? tableAlias (LPAREN identifier (COMMA identifier)* RPAREN)? -> ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR ^(TOK_FUNCTION Identifier["inline"] valuesClause) identifier* tableAlias))) ; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d0fe8ab322..cfb3d0b338 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -45,7 +45,6 @@ import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; -import org.antlr.runtime.Token; import org.antlr.runtime.TokenRewriteStream; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; @@ -72,7 +71,6 @@ import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NotificationEvent; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; @@ -247,6 +245,7 @@ import com.google.common.base.Splitter; import com.google.common.base.Strings; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import com.google.common.math.IntMath; @@ -1231,12 +1230,14 @@ private void processJoin(QB qb, ASTNode join) throws SemanticException { private String processLateralView(QB qb, ASTNode lateralView) throws SemanticException { int numChildren = lateralView.getChildCount(); - assert (numChildren == 2); - ASTNode next = (ASTNode) lateralView.getChild(1); - String alias = null; + if (!isCBOSupportedLateralView(lateralView)) { + queryProperties.setCBOSupportedLateralViews(false); + } + ASTNode next = (ASTNode) lateralView.getChild(1); + String alias = null; switch (next.getToken().getType()) { case HiveParser.TOK_TABREF: alias = processTable(qb, next); @@ -1258,6 +1259,15 @@ private String processLateralView(QB qb, ASTNode lateralView) return alias; } + private String extractLateralViewAlias(ASTNode lateralView) { + // Lateral view AST has the following shape: + // ^(TOK_LATERAL_VIEW + // ^(TOK_SELECT ^(TOK_SELEXPR ^(TOK_FUNCTION Identifier params) identifier* tableAlias))) + ASTNode selExpr = (ASTNode) lateralView.getChild(0).getChild(0); + ASTNode astTableAlias = (ASTNode) Iterables.getLast(selExpr.getChildren()); + return astTableAlias.getChild(0).getText(); + } + /** * Phase 1: (including, but not limited to): * @@ -9406,6 +9416,10 @@ boolean isCBOExecuted() { return false; } + boolean isCBOSupportedLateralView(ASTNode lateralView) { + return false; + } + boolean continueJoinMerge() { return true; } diff --git a/ql/src/test/queries/clientpositive/tablevalues.q b/ql/src/test/queries/clientpositive/tablevalues.q index 501f532eb7..e3d605a034 100644 --- a/ql/src/test/queries/clientpositive/tablevalues.q +++ b/ql/src/test/queries/clientpositive/tablevalues.q @@ -92,3 +92,36 @@ SELECT tf.col1, tf.col2, tf.col3 FROM (SELECT key, value FROM mytbl) t, LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf(col1, col2, col3); + +EXPLAIN +SELECT t.key +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf; + +SELECT t.key +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf; + +EXPLAIN +SELECT tf.col3 +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf(col1, col2, col3); + +SELECT tf.col3 +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf(col1, col2, col3); + +EXPLAIN +SELECT tf.col3 +FROM + (SELECT row_number() over (order by key desc) as r FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.r),('B', 20, t.r)) AS tf(col1, col2, col3); + +SELECT tf.col3 +FROM + (SELECT row_number() over (order by key desc) as r FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.r),('B', 20, t.r)) AS tf(col1, col2, col3); diff --git a/ql/src/test/results/clientpositive/tablevalues.q.out b/ql/src/test/results/clientpositive/tablevalues.q.out index 247c5647a3..4621408625 100644 --- a/ql/src/test/results/clientpositive/tablevalues.q.out +++ b/ql/src/test/results/clientpositive/tablevalues.q.out @@ -192,48 +192,23 @@ STAGE PLANS: alias: mytbl Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: array(struct(key,value,'A',10,key),struct(key,value,'B',20,key)) (type: array>) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward + UDTF Operator Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + function name: inline Select Operator + expressions: col3 (type: string), col4 (type: int), col5 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - Lateral View Join Operator - outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: int), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: array(struct('A',10,_col0),struct('B',20,_col0)) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - UDTF Operator + File Output Operator + compressed: false Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - function name: inline - Lateral View Join Operator - outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: int), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -689,48 +664,23 @@ STAGE PLANS: alias: mytbl Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: array(struct(key,value,'A',10,key),struct(key,value,'B',20,key)) (type: array>) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward + UDTF Operator Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + function name: inline Select Operator + expressions: col3 (type: string), col4 (type: int), col5 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - Lateral View Join Operator - outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: int), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: array(struct('A',10,_col0),struct('B',20,_col0)) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - UDTF Operator + File Output Operator + compressed: false Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE - function name: inline - Lateral View Join Operator - outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: int), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 82 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -762,3 +712,249 @@ A 10 10 B 20 10 A 10 100 B 20 100 +PREHOOK: query: EXPLAIN +SELECT t.key +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT t.key +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: mytbl + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: array(struct(key,value,'A',10,key),struct(key,value,'B',20,key)) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + function name: inline + Select Operator + expressions: col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t.key +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf +PREHOOK: type: QUERY +PREHOOK: Input: default@mytbl +#### A masked pattern was here #### +POSTHOOK: query: SELECT t.key +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mytbl +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +100 +100 +PREHOOK: query: EXPLAIN +SELECT tf.col3 +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf(col1, col2, col3) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT tf.col3 +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf(col1, col2, col3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: mytbl + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: array(struct(key,value,'A',10,key),struct(key,value,'B',20,key)) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + function name: inline + Select Operator + expressions: col5 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tf.col3 +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf(col1, col2, col3) +PREHOOK: type: QUERY +PREHOOK: Input: default@mytbl +#### A masked pattern was here #### +POSTHOOK: query: SELECT tf.col3 +FROM + (SELECT key, value FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.key),('B', 20, t.key)) AS tf(col1, col2, col3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mytbl +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +100 +100 +PREHOOK: query: EXPLAIN +SELECT tf.col3 +FROM + (SELECT row_number() over (order by key desc) as r FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.r),('B', 20, t.r)) AS tf(col1, col2, col3) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT tf.col3 +FROM + (SELECT row_number() over (order by key desc) as r FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.r),('B', 20, t.r)) AS tf(col1, col2, col3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: mytbl + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int), key (type: string) + sort order: +- + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 DESC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: array(struct(row_number_window_0,'A',10,row_number_window_0),struct(row_number_window_0,'B',20,row_number_window_0)) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + function name: inline + Select Operator + expressions: col4 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tf.col3 +FROM + (SELECT row_number() over (order by key desc) as r FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.r),('B', 20, t.r)) AS tf(col1, col2, col3) +PREHOOK: type: QUERY +PREHOOK: Input: default@mytbl +#### A masked pattern was here #### +POSTHOOK: query: SELECT tf.col3 +FROM + (SELECT row_number() over (order by key desc) as r FROM mytbl) t, + LATERAL TABLE(VALUES('A', 10, t.r),('B', 20, t.r)) AS tf(col1, col2, col3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mytbl +#### A masked pattern was here #### +1 +1 +2 +2 +3 +3 +4 +4 +5 +5