diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java index fac3cea..ca19fd0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java @@ -191,7 +191,7 @@ public static SparkTask createSparkTask(SparkWork work, HiveConf conf) { } /** - * Recursively find all operators under root, that are of class clazz, and + * Recursively find all operators under root, that are of class clazz or are the sub-class of clazz, and * put them in result. * @param result all operators under root that are of class clazz * @param root the root operator under which all operators will be examined @@ -202,7 +202,7 @@ public static void collectOp(Collection> result, Operator root, C if (root == null) { return; } - if (clazz.equals(root.getClass())) { + if (clazz.isAssignableFrom(root.getClass())) { result.add(root); } for (Operator child : root.getChildOperators()) { diff --git a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index f2403aa..2ec8f99 100644 --- a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -3584,6 +3584,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: srcpart_hour + filterExpr: (UDFToDouble(hr) = 13.0) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(hr) = 13.0) (type: boolean) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan alias: srcpart filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -3595,13 +3614,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: srcpart_date @@ -3620,25 +3639,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: srcpart_hour - filterExpr: (UDFToDouble(hr) = 13.0) (type: boolean) - Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(hr) = 13.0) (type: boolean) - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hr (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -3646,14 +3646,14 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 189 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 189 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -3662,7 +3662,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 207 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -4353,20 +4353,6 @@ STAGE PLANS: partition key expr: ds Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE target work: Map 1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target column: ds (string) - partition key expr: ds - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - target work: Map 4 Reducer 13 Execution mode: vectorized Reduce Operator Tree: @@ -4397,20 +4383,6 @@ STAGE PLANS: partition key expr: ds Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE target work: Map 1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target column: ds (string) - partition key expr: ds - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - target work: Map 4 Stage: Stage-1 Spark @@ -6149,17 +6121,16 @@ POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcp where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -6175,16 +6146,11 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: srcpart_date @@ -6197,20 +6163,10 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -6218,10 +6174,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: srcpart_hour @@ -6238,24 +6194,34 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 input vertices: - 0 Map 2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + 1 Map 3 + Statistics: Num rows: 1 Data size: 189 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 1 Data size: 207 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Group By Operator