Details
-
Bug
-
Status: Resolved
-
Critical
-
Resolution: Fixed
-
None
-
None
-
None
-
None
-
hive 0.9.0+158-1.cdh4.1.3.p0.23~squeeze-cdh4.1.3
-
hive emptyFile
Description
Our hive jobs fail due to strange error pasted below. Strace showed that process created this file, accessed it a few times and then it throwed exception that it couldn't find file it just accessed. In next step it unliked it. Yay.
Very similar problem was reported in already closed task or left unresolved on mailing lists.
I'll be happy to provide required additional details.
Stack trace
2013-07-18 12:49:46,109 ERROR security.UserGroupInformation (UserGroupInformation.java:doAs(1335)) - PriviledgedActionException as:username (auth:SIMPLE) cause:java.io.FileNotFoundException: File does not exist: /tmp/username/hive_2013-07-18_12-49-45_218_605775464480014480/-mr-10000/1/emptyFile
2013-07-18 12:49:46,113 ERROR exec.ExecDriver (SessionState.java:printError(403)) - Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: /tmp/username/hive_2013-07-18_12-49-45_218_605775464480014480/-mr-10000/1/emptyFile)'
java.io.FileNotFoundException: File does not exist: /tmp/username/hive_2013-07-18_12-49-45_218_605775464480014480/-mr-10000/1/emptyFile
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:787)
at org.apache.hadoop.mapred.lib.CombineFileInputFormat$OneFileInfo.<init>(CombineFileInputFormat.java:462)
at org.apache.hadoop.mapred.lib.CombineFileInputFormat.getMoreSplits(CombineFileInputFormat.java:256)
at org.apache.hadoop.mapred.lib.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:212)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:392)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:358)
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:387)
at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:1040)
at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1032)
at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:172)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:942)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:895)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:895)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:869)
at org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:435)
at org.apache.hadoop.hive.ql.exec.ExecDriver.main(ExecDriver.java:677)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:208)
strace with grep emptyFile
7385 14:48:02.808096 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile", {st_mode=S_IFREG|0755, st_size=0, ...}) = 0 7385 14:48:02.808201 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile", {st_mode=S_IFREG|0755, st_size=0, ...}) = 0 7385 14:48:02.808277 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile", {st_mode=S_IFREG|0755, st_size=0, ...}) = 0 7385 14:48:02.808348 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile", {st_mode=S_IFREG|0755, st_size=0, ...}) = 0 7385 14:48:02.808506 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/.emptyFile.crc", {st_mode=S_IFREG|0640, st_size=8, ...}) = 0 7385 14:48:02.808584 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/.emptyFile.crc", {st_mode=S_IFREG|0640, st_size=8, ...}) = 0 7385 14:48:02.808650 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/.emptyFile.crc", {st_mode=S_IFREG|0640, st_size=8, ...}) = 0 7385 14:48:02.808708 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/.emptyFile.crc", {st_mode=S_IFREG|0640, st_size=8, ...}) = 0 7410 14:48:02.831228 write(228, "\0\0\0\236\6\10\2\20\0\30\37\225\1\n\vgetFileInfo\22T\nR/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile\32.org.apache.hadoop.hdfs.protocol.ClientProtocol \1", 162) = 162 7385 14:48:02.832550 write(225, "2013-07-18 14:48:02,832 ERROR security.UserGroupInformation (UserGroupInformation.java:doAs(1335)) - PriviledgedActionException as:username (auth:SIMPLE) cause:java.io.FileNotFoundException: File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile\n", 293) = 293 7385 14:48:02.832735 write(2, "java.io.FileNotFoundException: File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile", 134) = 134 7371 14:48:02.832848 <... read resumed> "java.io.FileNotFoundException: File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile\n", 8192) = 135 7371 14:48:02.836376 write(2, "java.io.FileNotFoundException: File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile", 134) = 134 7385 14:48:02.836575 write(2, "Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile)'", 173 <unfinished ...> 7385 14:48:02.836911 write(225, "2013-07-18 14:48:02,836 ERROR exec.ExecDriver (SessionState.java:printError(403)) - Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile)'\njava.io.FileNotFoundException: File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile\n\tat org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:787)\n\tat org.apache.hadoop.mapred.lib.CombineFileInputFormat$OneFileInfo.<init>(CombineFileInputFormat.java:462)\n\tat org.apache.hadoop.mapred.lib.CombineFileInputFormat.getMoreSplits(CombineFileInputFormat.java:256)\n\tat org.apache.hadoop.mapred.lib.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:212)\n\tat org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:392)\n\tat org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplit"..., 2322 <unfinished ...> 7371 14:48:02.839271 read(237, "Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile)'\n", 8192) = 174 7371 14:48:02.839477 write(2, "Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: /tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile)'", 173 <unfinished ...> 7385 14:48:02.841022 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile", {st_mode=S_IFREG|0755, st_size=0, ...}) = 0 7385 14:48:02.841078 unlink("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/emptyFile") = 0 7385 14:48:02.841145 stat("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/.emptyFile.crc", {st_mode=S_IFREG|0640, st_size=8, ...}) = 0 7385 14:48:02.841198 unlink("/tmp/username/hive_2013-07-18_14-48-00_700_8005967322498387476/-mr-10000/1/.emptyFile.crc") = 0