diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 856de61..5135fa0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -482,6 +482,11 @@ private boolean checkColEquality(List> grandParentColNames, int colCount = 0; // parent op is guaranteed to have a single list because it is a reduce sink for (String colName : parentColNames.get(0)) { + if (listBucketCols.size() <= colCount) { + // can happen with virtual columns. RS would add the column to its output columns + // but it would not exist in the grandparent output columns or exprMap. + return false; + } // all columns need to be at least a subset of the parentOfParent's bucket cols ExprNodeDesc exprNodeDesc = colExprMap.get(colName); if (exprNodeDesc instanceof ExprNodeColumnDesc) { diff --git ql/src/test/queries/clientpositive/bucket_map_join_tez1.q ql/src/test/queries/clientpositive/bucket_map_join_tez1.q index c9266a5..42e26a8 100644 --- ql/src/test/queries/clientpositive/bucket_map_join_tez1.q +++ ql/src/test/queries/clientpositive/bucket_map_join_tez1.q @@ -82,4 +82,6 @@ from tab1 a join tab_part b on a.key = b.key; explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; - +explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key and a.ds = b.ds; diff --git ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out index 512349d..706988d 100644 --- ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out @@ -1205,3 +1205,90 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key and a.ds = b.ds +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key and a.ds = b.ds +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int), ds (type: string) + 1 key (type: int), ds (type: string) + outputColumnNames: _col0, _col1, _col7 + input vertices: + 0 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), ds (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), ds (type: string) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: b + Partition key expr: ds + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Target column: ds + Target Vertex: Map 1 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +