diff --git a/data/files/filterCard.txt b/data/files/filterCard.txt new file mode 100644 index 0000000000..6246bfb86e --- /dev/null +++ b/data/files/filterCard.txt @@ -0,0 +1,101 @@ +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 423913b56b..dfe7540232 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -340,14 +340,16 @@ protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, aspCtx.setAndExprStats(andStats); // evaluate children + evaluatedRowCount = stats.getNumRows(); for (ExprNodeDesc child : genFunc.getChildren()) { - newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, + evaluatedRowCount = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx, neededCols, op, evaluatedRowCount); - if (satisfyPrecondition(aspCtx.getAndExprStats())) { - updateStats(aspCtx.getAndExprStats(), newNumRows, true, op); - } else { - updateStats(aspCtx.getAndExprStats(), newNumRows, false, op); - } + } + newNumRows = evaluatedRowCount; + if (satisfyPrecondition(aspCtx.getAndExprStats())) { + updateStats(aspCtx.getAndExprStats(), newNumRows, true, op); + } else { + updateStats(aspCtx.getAndExprStats(), newNumRows, false, op); } } else if (udf instanceof GenericUDFOPOr) { // for OR condition independently compute and update stats. @@ -374,7 +376,7 @@ protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, return evaluateNotNullExpr(stats, genFunc); } else { // single predicate condition - newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op, evaluatedRowCount); + newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op, stats.getNumRows()); } } else if (pred instanceof ExprNodeColumnDesc) { @@ -551,7 +553,7 @@ private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long newNumRows = 0; for (ExprNodeDesc child : genFunc.getChildren()) { newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols, - op, 0); + op, numRows); } return numRows - newNumRows; } else if (leaf instanceof ExprNodeConstantDesc) { @@ -838,7 +840,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List neededCols, Operator op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { - long numRows = stats.getNumRows(); + long numRows = evaluatedRowCount; if (child instanceof ExprNodeGenericFuncDesc) { diff --git a/ql/src/test/queries/clientpositive/filter_cardinality.q b/ql/src/test/queries/clientpositive/filter_cardinality.q new file mode 100644 index 0000000000..2dfd2462c6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/filter_cardinality.q @@ -0,0 +1,21 @@ +-- to test cardinality of filter in physical planning +set hive.stats.fetch.column.stats=true; +set hive.explain.user=false; + +CREATE TABLE mytable +( +num1 INT, +num2 INT, +num3 INT +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ","; + +LOAD DATA LOCAL inpath "../../data/files/filterCard.txt" into table mytable; + +ANALYZE table mytable compute statistics for columns; + +explain select * from mytable where num1=4; +explain select * from mytable where num1=4 and num2=8; +explain select * from mytable where num1=4 and num2=8 and num3=7; + +drop table mytable; diff --git a/ql/src/test/results/clientpositive/filter_cardinality.q.out b/ql/src/test/results/clientpositive/filter_cardinality.q.out new file mode 100644 index 0000000000..de41578adc --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_cardinality.q.out @@ -0,0 +1,152 @@ +PREHOOK: query: CREATE TABLE mytable +( +num1 INT, +num2 INT, +num3 INT +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@mytable +POSTHOOK: query: CREATE TABLE mytable +( +num1 INT, +num2 INT, +num3 INT +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mytable +PREHOOK: query: LOAD DATA LOCAL inpath "../../data/files/filterCard.txt" into table mytable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@mytable +POSTHOOK: query: LOAD DATA LOCAL inpath "../../data/files/filterCard.txt" into table mytable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@mytable +PREHOOK: query: ANALYZE table mytable compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@mytable +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE table mytable compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mytable +#### A masked pattern was here #### +PREHOOK: query: explain select * from mytable where num1=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from mytable where num1=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: mytable + Statistics: Num rows: 75 Data size: 900 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (num1 = 4) (type: boolean) + Statistics: Num rows: 38 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 4 (type: int), num2 (type: int), num3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from mytable where num1=4 and num2=8 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from mytable where num1=4 and num2=8 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: mytable + Statistics: Num rows: 75 Data size: 900 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((num1 = 4) and (num2 = 8)) (type: boolean) + Statistics: Num rows: 19 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 4 (type: int), 8 (type: int), num3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 19 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 19 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from mytable where num1=4 and num2=8 and num3=7 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from mytable where num1=4 and num2=8 and num3=7 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: mytable + Statistics: Num rows: 75 Data size: 900 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((num1 = 4) and (num2 = 8) and (num3 = 7)) (type: boolean) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 4 (type: int), 8 (type: int), 7 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table mytable +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@mytable +PREHOOK: Output: default@mytable +POSTHOOK: query: drop table mytable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@mytable +POSTHOOK: Output: default@mytable