Details
Description
Whenever, I try to do multiple grouping using data frames my job crashes with the error FileNotFoundException and message = too many open files.
I can do these groupings using RDD easily but when I use the DataFrame operation I see these issues.
The code I am running:
```
df_t = df.filter(df['max_cum_rank'] == 0).select(['col1','col2']).groupby('col1').agg(F.min('col2')).groupby('min(col2)').agg(F.countDistinct('col1')).toPandas()
```
In [151]: df_t = df.filter(df['max_cum_rank'] == 0).select(['col1','col2']).groupby('col1').agg(F.min('col2')).groupby('min(col2)').agg(F.countDistinct('col1')).toPandas()
[Stage 27:=====================================================>(415 + 1) / 416]15/12/09 06:36:36 ERROR DiskBlockObjectWriter: Uncaught exception while reverting partial writes to file /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/22/temp_shuffle_1abbf917-842c-41ef-b113-ed60ee22e675
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/22/temp_shuffle_1abbf917-842c-41ef-b113-ed60ee22e675 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.revertPartialWritesAndClose(DiskBlockObjectWriter.scala:160)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.stop(BypassMergeSortShuffleWriter.java:174)
at org.apache.spark.shuffle.sort.SortShuffleWriter.stop(SortShuffleWriter.scala:104)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR DiskBlockObjectWriter: Uncaught exception while reverting partial writes to file /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/29/temp_shuffle_e35e6e28-fdbf-4775-a32d-d0f5fd882e9e
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/29/temp_shuffle_e35e6e28-fdbf-4775-a32d-d0f5fd882e9e (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.revertPartialWritesAndClose(DiskBlockObjectWriter.scala:160)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.stop(BypassMergeSortShuffleWriter.java:174)
at org.apache.spark.shuffle.sort.SortShuffleWriter.stop(SortShuffleWriter.scala:104)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR DiskBlockObjectWriter: Uncaught exception while reverting partial writes to file /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/18/temp_shuffle_2d26adcb-e3bb-4a01-8998-7428ebe5544d
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/18/temp_shuffle_2d26adcb-e3bb-4a01-8998-7428ebe5544d (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.revertPartialWritesAndClose(DiskBlockObjectWriter.scala:160)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.stop(BypassMergeSortShuffleWriter.java:174)
at org.apache.spark.shuffle.sort.SortShuffleWriter.stop(SortShuffleWriter.scala:104)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR DiskBlockObjectWriter: Uncaught exception while reverting partial writes to file /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/33/temp_shuffle_e82d6779-eb0e-465f-9bbc-8375f5dc1030
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/33/temp_shuffle_e82d6779-eb0e-465f-9bbc-8375f5dc1030 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.revertPartialWritesAndClose(DiskBlockObjectWriter.scala:160)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.stop(BypassMergeSortShuffleWriter.java:174)
at org.apache.spark.shuffle.sort.SortShuffleWriter.stop(SortShuffleWriter.scala:104)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR DiskBlockObjectWriter: Uncaught exception while reverting partial writes to file /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/0d/temp_shuffle_5de10ef5-7709-4c2e-a3c1-5e58b3dcef33
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/0d/temp_shuffle_5de10ef5-7709-4c2e-a3c1-5e58b3dcef33 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.revertPartialWritesAndClose(DiskBlockObjectWriter.scala:160)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.stop(BypassMergeSortShuffleWriter.java:174)
at org.apache.spark.shuffle.sort.SortShuffleWriter.stop(SortShuffleWriter.scala:104)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6060
15/12/09 06:36:36 ERROR Executor: Exception in task 14.0 in stage 28.0 (TID 6060)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/06/temp_shuffle_4d7ca6f5-c2b0-4b6e-8054-df53f0cbc54e (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6047
15/12/09 06:36:36 ERROR Executor: Exception in task 1.0 in stage 28.0 (TID 6047)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/30/temp_shuffle_7db91926-013b-4a1e-b640-8d34049336cc (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6061
15/12/09 06:36:36 ERROR Executor: Exception in task 15.0 in stage 28.0 (TID 6061)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/28/temp_shuffle_41e174db-d29d-4129-a652-2a9f739f1b59 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6063
15/12/09 06:36:36 ERROR Executor: Exception in task 17.0 in stage 28.0 (TID 6063)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/23/temp_shuffle_38a8bbc9-5094-48c1-a96d-502478d887d7 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6062
15/12/09 06:36:36 ERROR Executor: Exception in task 16.0 in stage 28.0 (TID 6062)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/13/temp_shuffle_36e1cb5a-cd8d-446e-bc3f-0b39da891aa5 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6053
15/12/09 06:36:36 ERROR Executor: Exception in task 7.0 in stage 28.0 (TID 6053)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/00/temp_shuffle_0f9c515e-d4fd-43b7-9868-f4a18c092608 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR TaskSetManager: Task 16 in stage 28.0 failed 1 times; aborting job
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6049
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6074
15/12/09 06:36:36 ERROR Executor: Exception in task 3.0 in stage 28.0 (TID 6049)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/21/temp_shuffle_150f2b2e-e262-45b3-8660-5cc347747405 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6070
15/12/09 06:36:36 ERROR Executor: Exception in task 28.0 in stage 28.0 (TID 6074)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/3b/temp_shuffle_5e59f4d6-3be9-43a6-a54f-8c035d057acb (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 24.0 in stage 28.0 (TID 6070)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/00/temp_shuffle_fed61d01-6e05-40f6-8378-936db6e5d22f (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6051
15/12/09 06:36:36 ERROR Executor: Exception in task 5.0 in stage 28.0 (TID 6051)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/2d/temp_shuffle_3b9f5d64-7cf2-4106-90c7-0b4dc99a8287 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6056
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6067
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6052
15/12/09 06:36:36 ERROR Executor: Exception in task 6.0 in stage 28.0 (TID 6052)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/05/temp_shuffle_9f0996ab-d960-4091-be1d-82e6598dd901 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6046
15/12/09 06:36:36 ERROR Executor: Exception in task 21.0 in stage 28.0 (TID 6067)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/07/temp_shuffle_cef223a8-42fe-4f31-be3e-3e00825848d5 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 10.0 in stage 28.0 (TID 6056)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/1f/temp_shuffle_ab66c3d0-3770-4018-8bb3-ae22e67ebfcd (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6075
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6069
15/12/09 06:36:36 ERROR Executor: Exception in task 0.0 in stage 28.0 (TID 6046)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/08/temp_shuffle_5d5e9a47-4d2b-4b40-8ecf-1bae5e97e500 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 23.0 in stage 28.0 (TID 6069)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/33/temp_shuffle_0fecf52c-b590-4764-b7fc-5953e08bd044 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 29.0 in stage 28.0 (TID 6075)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/07/temp_shuffle_c0c6e399-7143-4224-a5ab-6f6259f4837f (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6057
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6048
15/12/09 06:36:36 ERROR Executor: Exception in task 11.0 in stage 28.0 (TID 6057)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/35/temp_shuffle_56640593-78bd-4764-89ae-19c6d6f05490 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 2.0 in stage 28.0 (TID 6048)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/05/temp_shuffle_ea528ebe-a201-4e2b-87fd-9391e29411ed (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6066
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6058
15/12/09 06:36:36 ERROR Executor: Exception in task 20.0 in stage 28.0 (TID 6066)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/2b/temp_shuffle_5618ff6c-86f1-4116-811a-0ba663b9b0d0 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6059
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6065
15/12/09 06:36:36 ERROR Executor: Exception in task 12.0 in stage 28.0 (TID 6058)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/02/temp_shuffle_83491db1-9489-499f-b700-7d336d4935d0 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 33554432 bytes, TID = 6077
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6064
15/12/09 06:36:36 ERROR Executor: Exception in task 19.0 in stage 28.0 (TID 6065)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/00/temp_shuffle_c8ae819f-2352-4a5f-901b-24d8f0902e6d (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 13.0 in stage 28.0 (TID 6059)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/0a/temp_shuffle_e4f1a947-1d61-429c-8dfb-dde7b2732b37 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6073
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6050
15/12/09 06:36:36 ERROR Executor: Exception in task 18.0 in stage 28.0 (TID 6064)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/0a/temp_shuffle_5e4c5ffc-8168-4b33-8b3e-3ae57bdf9cee (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6071
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6068
15/12/09 06:36:36 ERROR Executor: Exception in task 25.0 in stage 28.0 (TID 6071)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/34/temp_shuffle_fcea2161-b7e9-4ce0-af5e-6147cd2b3a4c (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6072
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6055
15/12/09 06:36:36 ERROR Executor: Exception in task 4.0 in stage 28.0 (TID 6050)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/38/temp_shuffle_a7275df0-f59a-49a0-b2b2-29b1e30d6d3c (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 27.0 in stage 28.0 (TID 6073)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/19/temp_shuffle_4e719ea3-2bbb-4bde-9238-2aaf988f64ed (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 33554432 bytes, TID = 6080
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 16777216 bytes, TID = 6054
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 33554432 bytes, TID = 6076
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 33554432 bytes, TID = 6078
15/12/09 06:36:36 ERROR Executor: Exception in task 9.0 in stage 28.0 (TID 6055)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/09/temp_shuffle_8e4e3189-ae82-48ef-b86d-caeef7706741 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 26.0 in stage 28.0 (TID 6072)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/32/temp_shuffle_36fbaa0d-82e2-4328-90ad-50a9b3e0ee83 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Managed memory leak detected; size = 33554432 bytes, TID = 6079
15/12/09 06:36:36 ERROR Executor: Exception in task 22.0 in stage 28.0 (TID 6068)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/2e/temp_shuffle_9d1e1d71-7aac-4ab9-ada7-7020471623a3 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/12/09 06:36:36 ERROR Executor: Exception in task 8.0 in stage 28.0 (TID 6054)
java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/25/temp_shuffle_4896feb2-f011-49ce-bdaa-4a6d20b1b306 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-151-a63780470b3c> in <module>()
----> 1 df_t = df.filter(df['max_cum_rank'] == 0).select(['col1','col2']).groupby('col1').agg(F.min('col2')).groupby('min(col2)').agg(F.countDistinct('col1')).toPandas()
/pathSW/SOFTWARE/spark-1.5.0/python/pyspark/sql/dataframe.pyc in toPandas(self)
1269 """
1270 import pandas as pd
-> 1271 return pd.DataFrame.from_records(self.collect(), columns=self.columns)
1272
1273 ##########################################################################################
/pathSW/SOFTWARE/spark-1.5.0/python/pyspark/sql/dataframe.pyc in collect(self)
277 """
278 with SCCallSiteSync(self._sc) as css:
--> 279 port = self._sc._jvm.PythonRDD.collectAndServe(self._jdf.javaToPython().rdd())
280 return list(_load_from_socket(port, BatchedSerializer(PickleSerializer())))
281
/pathSW/SOFTWARE/spark-1.5.0/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in _call_(self, *args)
536 answer = self.gateway_client.send_command(command)
537 return_value = get_return_value(answer, self.gateway_client,
--> 538 self.target_id, self.name)
539
540 for temp_arg in temp_args:
/pathSW/SOFTWARE/spark-1.5.0/python/pyspark/sql/utils.pyc in deco(*a, **kw)
34 def deco(*a, **kw):
35 try:
---> 36 return f(*a, **kw)
37 except py4j.protocol.Py4JJavaError as e:
38 s = e.java_exception.toString()
/pathSW/SOFTWARE/spark-1.5.0/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
298 raise Py4JJavaError(
299 'An error occurred while calling
.\n'.
--> 300 format(target_id, '.', name), value)
301 else:
302 raise Py4JError(
Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 16 in stage 28.0 failed 1 times, most recent failure: Lost task 16.0 in stage 28.0 (TID 6062, localhost): java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/13/temp_shuffle_36e1cb5a-cd8d-446e-bc3f-0b39da891aa5 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1280)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1268)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1267)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1267)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1455)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1444)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1813)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1826)
sqlContext = SQLContext(sc)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1839)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1910)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:905)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.collect(RDD.scala:904)
at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:373)
at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:207)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.FileNotFoundException: /path/tmp/blockmgr-fde0f618-e443-4841-96c4-54c5e5b8fa0f/13/temp_shuffle_36e1cb5a-cd8d-446e-bc3f-0b39da891aa5 (Too many open files)
at java.io.FileOutputStream.open(Native Method)
at java.io.FileOutputStream.<init>(FileOutputStream.java:221)
at org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:88)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:110)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
... 1 more