diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 3613784..5ec9e88 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -64,6 +64,8 @@ import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SparkWork; import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.UnionWork; @@ -213,6 +215,20 @@ public String getName() { } return result; } + + @Override + public Operator getReducer(MapWork mapWork) { + List children = getWork().getChildren(mapWork); + if (children.size() != 1) { + return null; + } + + if (!(children.get(0) instanceof ReduceWork)) { + return null; + } + + return ((ReduceWork)children.get(0)).getReducer(); + } public SparkCounters getSparkCounters() { return sparkCounters; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index a76cac5..60dcdb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.ql.lib.TypeRule; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; +import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; @@ -270,6 +271,12 @@ protected void optimizeTaskPlan(List> rootTasks, Pa } else { LOG.debug("Skipping null scan query optimization"); } + + if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { + physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx); + } else { + LOG.debug("Skipping metadata only query optimization"); + } if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { physicalCtx = new CrossProductCheck().resolve(physicalCtx); diff --git ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out new file mode 100644 index 0000000..8ab5db2 --- /dev/null +++ ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out @@ -0,0 +1,608 @@ +PREHOOK: query: explain select ds from srcpart where hr=11 and ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain select ds from srcpart where hr=11 and ds='2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from srcpart where hr=11 and ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds from srcpart where hr=11 and ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +2008-04-08 +PREHOOK: query: explain select distinct hr from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct hr from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hr (type: string) + outputColumnNames: hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: hr (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct hr from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select distinct hr from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +11 +12