Description
Bucketmap inner join query between bucketed tables throws following exception when one table contains all the empty buckets while other has all the non-empty buckets.
Vertex failed, vertexName=Map 2, vertexId=vertex_1466710232033_0432_4_01, diagnostics=[Task failed, taskId=task_1466710232033_0432_4_01_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_0:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more ], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_1:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more ], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_2:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more ], TaskAttempt 3 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_3:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more ]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:59, Vertex vertex_1466710232033_0432_4_01 [Map 2] killed/failed due to:OWN_TASK_FAILURE] DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:0 FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Map 2, vertexId=vertex_1466710232033_0432_4_01, diagnostics=[Task failed, taskId=task_1466710232033_0432_4_01_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_0:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more ], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_1:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more ], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_2:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more ], TaskAttempt 3 failed, info=[Error: Error while running task ( failure ) : attempt_1466710232033_0432_4_01_000000_3:java.lang.RuntimeException: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) ... 14 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) ... 15 more
Steps to reproduce the issue:
set hive.execution.engine=tez; set hive.exeucution.mode=container; drop table if exists src_emptybucket; create table src_emptybucket (name string, age int, gpa double) clustered by (age) into 30 buckets stored as orc; insert into table src_emptybucket select * from studenttab10k limit 0; drop table if exists src_nonemptybucket; create table src_nonemptybucket (name varchar(50), age int, gpa decimal(38,18)) clustered by (age) into 60 buckets stored as orc; insert into table src_nonemptybucket select * from studenttab10k where age in ( select distinct(age) from studenttab10k limit 60); set hive.optimize.bucketmapjoin=true; set hive.convert.join.bucket.mapjoin.tez=true; select e1.name as e1_name, e1.age as e1_age, e1.gpa as e1_gpa, e2.name as e2_name, e2.age as e2_age, e2.gpa as e2_gpa from src_emptybucket e1 inner join src_nonemptybucket e2 on e1.age = e2.age;