diff --git metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g index 8aef5bf..c3adbfc 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g +++ metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g @@ -122,6 +122,10 @@ operatorExpression : betweenExpression | + inExpression + | + multiColInExpression + | binOpExpression ; @@ -203,16 +207,236 @@ betweenExpression tree.addIntermediateNode(isPositive ? LogicalOperator.AND : LogicalOperator.OR); }; +inExpression +@init { + List constants = new ArrayList(); + Object constantV = null; + boolean isPositive = true; +} + : + ( + LPAREN key = Identifier RPAREN (KW_NOT { isPositive = false; } )? IN + LPAREN + ( + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + constants.add(constantV); + } + (COLON constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + constants.add(constantV); + } )* + ) + | + ( + constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + constants.add(constantV); + } + (COLON constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + constants.add(constantV); + } )* + ) + | + ( + constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + constants.add(constantV); + } + (COLON constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + constants.add(constantV); + } )* + ) + ) RPAREN + ) + { + for (int i = 0; i < constants.size(); i++) { + Object value = constants.get(i); + LeafNode leaf = new LeafNode(); + leaf.keyName = key.getText(); + leaf.value = value; + leaf.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + tree.addLeafNode(leaf); + if (i != 0) { + tree.addIntermediateNode(isPositive ? LogicalOperator.OR : LogicalOperator.AND); + } + } + } + ; + +multiColInExpression +@init { + List keyNames = new ArrayList(); + List constants = new ArrayList(); + List partialConstants; + String keyV = null; + Object constantV = null; + boolean isPositive = true; +} + : + ( + LPAREN + ( + KW_STRUCT LPAREN key = Identifier + { + keyV = key.getText(); + keyNames.add(keyV); + } + (COLON key = Identifier + { + keyV = key.getText(); + keyNames.add(keyV); + } + )* RPAREN + ) + RPAREN (KW_NOT { isPositive = false; } )? IN LPAREN + KW_CONST KW_STRUCT LPAREN + { + partialConstants = new ArrayList(); + } + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | + constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | + constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + ( + COLON + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | + constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | + constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + )* + { + constants.add(partialConstants); + } + RPAREN + ( + COLON KW_CONST KW_STRUCT LPAREN + { + partialConstants = new ArrayList(); + } + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | + constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | + constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + ( + COLON + ( + constant = DateLiteral + { + constantV = FilterLexer.ExtractDate(constant.getText()); + partialConstants.add(constantV); + } + | + constant = StringLiteral + { + constantV = TrimQuotes(constant.getText()); + partialConstants.add(constantV); + } + | + constant = IntegralLiteral + { + constantV = Long.parseLong(constant.getText()); + partialConstants.add(constantV); + } + ) + )* + { + constants.add(partialConstants); + } + RPAREN + )* + RPAREN + ) + { + for (int i = 0; i < constants.size(); i++) { + List list = (List) constants.get(i); + assert keyNames.size() == list.size(); + for (int j=0; j < list.size(); j++) { + String keyName = keyNames.get(j); + Object value = list.get(j); + LeafNode leaf = new LeafNode(); + leaf.keyName = keyName; + leaf.value = value; + leaf.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + tree.addLeafNode(leaf); + if (j != 0) { + tree.addIntermediateNode(isPositive ? LogicalOperator.AND : LogicalOperator.OR); + } + } + if (i != 0) { + tree.addIntermediateNode(isPositive ? LogicalOperator.OR : LogicalOperator.AND); + } + } + } + ; + // Keywords KW_NOT : 'NOT'; KW_AND : 'AND'; KW_OR : 'OR'; KW_LIKE : 'LIKE'; KW_DATE : 'date'; +KW_CONST : 'CONST'; +KW_STRUCT : 'STRUCT'; // Operators LPAREN : '(' ; RPAREN : ')' ; +COLON : ',' ; EQUAL : '='; NOTEQUAL : '<>' | '!='; LESSTHANOREQUALTO : '<='; @@ -220,6 +444,7 @@ LESSTHAN : '<'; GREATERTHANOREQUALTO : '>='; GREATERTHAN : '>'; BETWEEN : 'BETWEEN'; +IN : 'IN'; // LITERALS fragment diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java index d83636d..11d8786 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -152,9 +152,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (LOG.isDebugEnabled()) { LOG.debug("Generated new predicate with IN clause: " + newPredicate); } - if (!extract) { - filterOp.getConf().setOrigPredicate(predicate); - } filterOp.getConf().setPredicate(newPredicate); } @@ -280,14 +277,37 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } // 4. We build the new predicate and return it - ExprNodeDesc newPredicate = null; + // First, the original predicate is transformed as follows: + // (a=1 and b=2 and c=3) or (a=2 and b=3 and c=4) -> + // (a,b,c) IN ((1,2,3),(2,3,4)) + // In addition, pre-conditions for the tuple expressions are extracted + // which is useful for partition pruner and/or reduce the number of + // comparisons. + // If a, b, and c are partition columns, and they belong to different tables: + // (a,b,c) IN ((1,2,3),(2,3,4)) -> + // (a) IN (1,2) and b in (2,3) and and c in (3,4) and (a,b,c) IN ((1,2,3),(2,3,4)) + // If a, b, and c are partition columns, a and b belong to table1, + // and c belong to table2: + // (a,b,c) IN ((1,2,3),(2,3,4)) -> + // (a,b) IN ((1,2),(2,3)) and c in (3,4) and (a,b,c) IN ((1,2,3),(2,3,4)) + // If none of them are partition columns: + // (a,b,c) IN ((1,2,3),(2,3,4)) -> + // (a,b,c) IN ((1,2,3),(2,3,4)) + // List newChildren = new ArrayList(children.size()); - // 4.1 Create structs - List columns = new ArrayList(); - List names = new ArrayList(); - List typeInfos = new ArrayList(); + ListMultimap newChildrenPerTable = ArrayListMultimap.create(); + boolean possibleReduction = false; + Set extractionColumns = new HashSet(); + for (int i = 0; i < children.size(); i++) { + List columns = new ArrayList(); + List names = new ArrayList(); + List typeInfos = new ArrayList(); List constantFields = new ArrayList(children.size()); + ListMultimap partitionColumnsPerTable = ArrayListMultimap.create(); + ListMultimap partitionColumnNamesPerTable = ArrayListMultimap.create(); + ListMultimap partitionColumnTypeInfosPerTable = ArrayListMultimap.create(); + ListMultimap constantFieldsPerTable = ArrayListMultimap.create(); for (String keyString : columnConstantsMap.keySet()) { Pair columnConstant = @@ -296,8 +316,21 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, columns.add(columnConstant.left); names.add(columnConstant.left.getColumn()); typeInfos.add(columnConstant.left.getTypeInfo()); + if (extract && columnConstant.left.getIsPartitionColOrVirtualCol()) { + partitionColumnsPerTable.put(columnConstant.left.getTabAlias(), + columnConstant.left); + partitionColumnNamesPerTable.put(columnConstant.left.getTabAlias(), + columnConstant.left.getColumn()); + partitionColumnTypeInfosPerTable.put(columnConstant.left.getTabAlias(), + columnConstant.left.getTypeInfo()); + } else if (extract) { + extractionColumns.add(keyString); + } } constantFields.add(columnConstant.right); + if (extract && columnConstant.left.getIsPartitionColOrVirtualCol()) { + constantFieldsPerTable.put(columnConstant.left.getTabAlias(), columnConstant.right); + } } if (i == 0) { @@ -309,6 +342,24 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, TypeInfoFactory.getStructTypeInfo(names, typeInfos), FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), columns); + if (extract) { + possibleReduction = true; // We have more than one column, we can try to + // reduce NDV in 5 + for (String tableAlias : partitionColumnsPerTable.keySet()) { + ExprNodeDesc columnsRefsByTable; + if (partitionColumnsPerTable.get(tableAlias).size() == 1) { + columnsRefsByTable = partitionColumnsPerTable.get(tableAlias).get(0); + } else { + columnsRefsByTable = new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo( + partitionColumnNamesPerTable.get(tableAlias), + partitionColumnTypeInfosPerTable.get(tableAlias)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + partitionColumnsPerTable.get(tableAlias)); + } + newChildrenPerTable.put(tableAlias, columnsRefsByTable); + } + } } newChildren.add(columnsRefs); } @@ -320,37 +371,52 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, TypeInfoFactory.getStructTypeInfo(names, typeInfos), FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), constantFields); + if (extract) { + for (String tableAlias : constantFieldsPerTable.keySet()) { + ExprNodeDesc valuesByTable; + if (constantFieldsPerTable.get(tableAlias).size() == 1) { + valuesByTable = constantFieldsPerTable.get(tableAlias).get(0); + } else { + valuesByTable = new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo( + partitionColumnNamesPerTable.get(tableAlias), + partitionColumnTypeInfosPerTable.get(tableAlias)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + constantFieldsPerTable.get(tableAlias)); + } + newChildrenPerTable.put(tableAlias, valuesByTable); + } + } } newChildren.add(values); } - newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); - - if (extract && columns.size() > 1) { - final List subExpr = new ArrayList(columns.size()+1); - // extract pre-conditions for the tuple expressions - // (a,b) IN ((1,2),(2,3)) -> - // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3)) + List subExpr = new ArrayList(children.size()); + // Create new predicates grouped by table; creation of new predicate containing + // all the values is done in 6 + for (String tableAlias : newChildrenPerTable.keySet()) { + ExprNodeDesc inClause = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), + newChildrenPerTable.get(tableAlias)); + subExpr.add(inClause); + } - for (String keyString : columnConstantsMap.keySet()) { - final Set valuesExpr = - new HashSet(children.size()); + // 5. Extract new predicates if they were not included in partition columns and + // we can reduce the NDV + if (possibleReduction) { // More than one column in the original in clause + for (String keyString : extractionColumns) { final List> partial = columnConstantsMap.get(keyString); + final Set valuesExpr = + new HashSet(children.size()); for (int i = 0; i < children.size(); i++) { - Pair columnConstant = partial - .get(i); - valuesExpr - .add(new ExprNodeDescEqualityWrapper(columnConstant.right)); + Pair columnConstant = partial.get(i); + valuesExpr.add(new ExprNodeDescEqualityWrapper(columnConstant.right)); } - ExprNodeColumnDesc lookupCol = partial.get(0).left; - // generate a partial IN clause, if the column is a partition column - if (lookupCol.getIsPartitionColOrVirtualCol() - || valuesExpr.size() < children.size()) { - // optimize only nDV reductions + // Optimize only NDV reductions + if (valuesExpr.size() < children.size()) { final List inExpr = new ArrayList(); - inExpr.add(lookupCol); + inExpr.add(partial.get(0).left); for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) { inExpr.add(value.getExprNodeDesc()); } @@ -359,17 +425,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr)); } } - // loop complete, inspect the sub expressions generated - if (subExpr.size() > 0) { - // add the newPredicate to the end & produce an AND clause - subExpr.add(newPredicate); - newPredicate = new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); - } - // else, newPredicate is unmodified } + // 6. Return the new predicate + ExprNodeDesc newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); + if (subExpr.size() > 0) { + // Add the newPredicate to the end and produce an AND clause + subExpr.add(newPredicate); + newPredicate = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry + .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + } return newPredicate; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index 7262164..fd51628 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,8 +55,7 @@ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); - predicate = predicate == null ? fop.getConf().getPredicate() : predicate; + ExprNodeDesc predicate = fop.getConf().getPredicate(); String alias = top.getConf().getAlias(); // Generate the partition pruning predicate diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 6a31689..ccc4bb4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -79,7 +79,6 @@ public String toString() { private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; - private transient ExprNodeDesc origPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -151,14 +150,6 @@ public void setSortedFilter(boolean isSortedFilter) { this.isSortedFilter = isSortedFilter; } - public void setOrigPredicate(ExprNodeDesc origPredicate) { - this.origPredicate = origPredicate; - } - - public ExprNodeDesc getOrigPredicate() { - return origPredicate; - } - /** * Some filters are generated or implied, which means it is not in the query. * It is added by the analyzer. For example, when we do an inner join, we add diff --git ql/src/test/queries/clientpositive/pointlookup4.q ql/src/test/queries/clientpositive/pointlookup4.q new file mode 100644 index 0000000..dfaa6b4 --- /dev/null +++ ql/src/test/queries/clientpositive/pointlookup4.q @@ -0,0 +1,26 @@ +drop table pcr_t1; + +create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string); +insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key; +insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key; +insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key; + +set hive.optimize.point.lookup=false; + +explain extended +select key, value, ds1, ds2 +from pcr_t1 +where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-09' and key=2) +order by key, value, ds1, ds2; + +set hive.optimize.point.lookup=true; +set hive.optimize.point.lookup.min=0; +set hive.optimize.point.lookup.extract=true; + +explain extended +select key, value, ds1, ds2 +from pcr_t1 +where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-09' and key=2) +order by key, value, ds1, ds2; + +drop table pcr_t1; diff --git ql/src/test/results/clientpositive/pointlookup4.q.out ql/src/test/results/clientpositive/pointlookup4.q.out new file mode 100644 index 0000000..69e8705 --- /dev/null +++ ql/src/test/results/clientpositive/pointlookup4.q.out @@ -0,0 +1,528 @@ +PREHOOK: query: drop table pcr_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds1 string, ds2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_t1 +PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds1=2000-04-08/ds2=2001-04-08 +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-08', ds2='2001-04-08') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds1=2000-04-08/ds2=2001-04-08 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-08,ds2=2001-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-08,ds2=2001-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds1=2000-04-09/ds2=2001-04-09 +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds1=2000-04-09/ds2=2001-04-09 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-09,ds2=2001-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-09,ds2=2001-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds1=2000-04-10/ds2=2001-04-10 +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds1=2000-04-10/ds2=2001-04-10 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-10,ds2=2001-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds1=2000-04-10,ds2=2001-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended +select key, value, ds1, ds2 +from pcr_t1 +where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-09' and key=2) +order by key, value, ds1, ds2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value, ds1, ds2 +from pcr_t1 +where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-09' and key=2) +order by key, value, ds1, ds2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds1 + TOK_SELEXPR + TOK_TABLE_OR_COL + ds2 + TOK_WHERE + or + and + and + = + TOK_TABLE_OR_COL + ds1 + '2000-04-08' + = + TOK_TABLE_OR_COL + ds2 + '2001-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + and + = + TOK_TABLE_OR_COL + ds1 + '2000-04-09' + = + TOK_TABLE_OR_COL + ds2 + '2001-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds1 + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((ds1 = '2000-04-08') and (ds2 = '2001-04-08') and (key = 1)) or ((ds1 = '2000-04-09') and (ds2 = '2001-04-09') and (key = 2))) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds2=2001-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds1 2000-04-08 + ds2 2001-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds1/ds2 + partition_columns.types string:string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds1/ds2 + partition_columns.types string:string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds2=2001-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds1 2000-04-09 + ds2 2001-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds1/ds2 + partition_columns.types string:string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds1/ds2 + partition_columns.types string:string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1] + /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select key, value, ds1, ds2 +from pcr_t1 +where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-09' and key=2) +order by key, value, ds1, ds2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value, ds1, ds2 +from pcr_t1 +where (ds1='2000-04-08' and ds2='2001-04-08' and key=1) or (ds1='2000-04-09' and ds2='2001-04-09' and key=2) +order by key, value, ds1, ds2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds1 + TOK_SELEXPR + TOK_TABLE_OR_COL + ds2 + TOK_WHERE + or + and + and + = + TOK_TABLE_OR_COL + ds1 + '2000-04-08' + = + TOK_TABLE_OR_COL + ds2 + '2001-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + and + = + TOK_TABLE_OR_COL + ds1 + '2000-04-09' + = + TOK_TABLE_OR_COL + ds2 + '2001-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds1 + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((struct(ds1,ds2)) IN (const struct('2000-04-08','2001-04-08'), const struct('2000-04-09','2001-04-09')) and (struct(ds1,key,ds2)) IN (const struct('2000-04-08',1,'2001-04-08'), const struct('2000-04-09',2,'2001-04-09'))) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds2=2001-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds1 2000-04-08 + ds2 2001-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds1/ds2 + partition_columns.types string:string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds1/ds2 + partition_columns.types string:string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds2=2001-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds1 2000-04-09 + ds2 2001-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds1/ds2 + partition_columns.types string:string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds1/ds2 + partition_columns.types string:string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds1=2000-04-08/ds2=2001-04-08 [pcr_t1] + /pcr_t1/ds1=2000-04-09/ds2=2001-04-09 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table pcr_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pcr_t1 +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: drop table pcr_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Output: default@pcr_t1