Uploaded image for project: 'Apache Ozone'
  1. Apache Ozone
  2. HDDS-5756

SIGSEGV RocksDB crash in Datanode

Log workAgile BoardRank to TopRank to BottomAttach filesAttach ScreenshotBulk Copy AttachmentsBulk Move AttachmentsVotersWatch issueWatchersCreate sub-taskLinkCloneLabelsUpdate Comment AuthorReplace String in CommentUpdate Comment VisibilityDelete Comments
    XMLWordPrintableJSON

Details

    Description

      During load testing, the following crash was observed.

      Current thread (0x00007f4e7b083800): JavaThread "grpc-default-executor-1243" daemon [_thread_in_native, id=30646, stack(0x00007f4de67b6000,0x00007f4de68b7000)]

      Stack: [0x00007f4de67b6000,0x00007f4de68b7000], sp=0x00007f4de68b4c90, free space=1019k
      Native frames: (J=compiled Java code, A=aot compiled Java code, j=interpreted, Vv=VM code, C=native code)
      C [librocksdbjni7210046854294290562.so+0x470f9e] rocksdb::ReadFileToString(rocksdb::FileSystem*, std::string const&, std::string*)+0x8e
      C [librocksdbjni7210046854294290562.so+0x41a500] rocksdb::VersionSet::GetCurrentManifestPath(std::string const&, rocksdb::FileSystem*, std::string*, unsigned long*)+0x60
      C [librocksdbjni7210046854294290562.so+0x431776] rocksdb::VersionSet::ListColumnFamilies(std::vector<std::string, std::allocator<std::string> >, std::string const&, rocksdb::FileSystem)+0x96
      C [librocksdbjni7210046854294290562.so+0x300d90] rocksdb::DB::ListColumnFamilies(rocksdb::DBOptions const&, std::string const&, std::vector<std::string, std::allocator<std::string> >*)+0x30
      C [librocksdbjni7210046854294290562.so+0x241e49] Java_org_rocksdb_RocksDB_listColumnFamilies+0x89
      J 4802 org.rocksdb.RocksDB.listColumnFamilies(JLjava/lang/String[[B (0 bytes) @ 0x00007f4e8485dda2 [0x00007f4e8485dcc0+0x00000000000000e2]
      J 13730 c2 org.apache.hadoop.hdds.utils.db.RDBStore.<init>(Ljava/io/File;Lorg/rocksdb/DBOptions;Lorg/rocksdb/WriteOptions;Ljava/util/Set;Lorg/apache/hadoop/hdds/utils/db/CodecRegistry;Z)V (717 bytes) @ 0x00007f4e85557484 [0x00007f4e85556b80+0x0000000000000904]
      J 14818 c2 org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getUncachedDatanodeStore(JLjava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;Z)Lorg/apache/hadoop/ozone/container/metadata/DatanodeStore; (84 bytes) @ 0x00007f4e8530cbac [0x00007f4e85309960+0x000000000000324c]
      J 21714 c2 org.apache.hadoop.ozone.container.common.utils.ContainerCache.getDB(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;)Lorg/apache/hadoop/ozone/container/common/utils/ReferenceCountedDB; (339 bytes) @ 0x00007f4e85f200fc [0x00007f4e85f1ec00+0x00000000000014fc]
      J 21274 c2 org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.getBlock(Lorg/apache/hadoop/ozone/container/common/interfaces/Container;Lorg/apache/hadoop/hdds/client/BlockID;)Lorg/apache/hadoop/ozone/container/common/helpers/BlockData; (299 bytes) @ 0x00007f4e85d95cf8 [0x00007f4e85d95b80+0x0000000000000178]
      J 21490 c2 org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto; (1105 bytes) @ 0x00007f4e85ebb89c [0x00007f4e85eb9100+0x000000000000279c]
      J 20711 c2 org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto; (38 bytes) @ 0x00007f4e85bacf00 [0x00007f4e85baccc0+0x0000000000000240]
      J 20718 c2 org.apache.hadoop.ozone.container.common.transport.server.GrpcXceiverService$1.onNext(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;)V (52 bytes) @ 0x00007f4e85bb71c8 [0x00007f4e85bb7160+0x0000000000000068]
      J 20363 c2 org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1MessagesAvailable.runInContext()V (77 bytes) @ 0x00007f4e8583e290 [0x00007f4e8583cda0+0x00000000000014f0]
      J 16086 c2 org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run()V (35 bytes) @ 0x00007f4e855d0fb0 [0x00007f4e855d0b00+0x00000000000004b0]
      J 21355 c2 org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run()V (99 bytes) @ 0x00007f4e85e7469c [0x00007f4e85e745a0+0x00000000000000fc]
      J 20042 c2 java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V java.base@11.0.5 (187 bytes) @ 0x00007f4e855b7780 [0x00007f4e855b75a0+0x00000000000001e0]
      J 17003 c1 java.util.concurrent.ThreadPoolExecutor$Worker.run()V java.base@11.0.5 (9 bytes) @ 0x00007f4e7d27e9e4 [0x00007f4e7d27e940+0x00000000000000a4]
      J 11466 c1 java.lang.Thread.run()V java.base@11.0.5 (17 bytes) @ 0x00007f4e7e77fcd4 [0x00007f4e7e77fb60+0x0000000000000174]
      v ~StubRoutines::call_stub
      V [libjvm.so+0x885ac9] JavaCalls::call_helper(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x3b9
      V [libjvm.so+0x883a6d] JavaCalls::call_virtual(JavaValue*, Handle, Klass*, Symbol*, Symbol*, Thread*)+0x1ed
      V [libjvm.so+0x92cd4c] thread_entry(JavaThread*, Thread*)+0x6c
      V [libjvm.so+0xdbea53] JavaThread::thread_main_inner()+0x103
      V [libjvm.so+0xdbed25] JavaThread::run()+0x2a5
      V [libjvm.so+0xdbaa3a] Thread::call_run()+0x13a
      V [libjvm.so+0xc0ad2e] thread_native_entry(Thread*)+0xee

      Java frames: (J=compiled Java code, j=interpreted, Vv=VM code)
      J 4802 org.rocksdb.RocksDB.listColumnFamilies(JLjava/lang/String[[B (0 bytes) @ 0x00007f4e8485dd2d [0x00007f4e8485dcc0+0x000000000000006d]
      J 13730 c2 org.apache.hadoop.hdds.utils.db.RDBStore.<init>(Ljava/io/File;Lorg/rocksdb/DBOptions;Lorg/rocksdb/WriteOptions;Ljava/util/Set;Lorg/apache/hadoop/hdds/utils/db/CodecRegistry;Z)V (717 bytes) @ 0x00007f4e85557484 [0x00007f4e85556b80+0x0000000000000904]
      J 14818 c2 org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getUncachedDatanodeStore(JLjava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;Z)Lorg/apache/hadoop/ozone/container/metadata/DatanodeStore; (84 bytes) @ 0x00007f4e8530cbac [0x00007f4e85309960+0x000000000000324c]
      J 21714 c2 org.apache.hadoop.ozone.container.common.utils.ContainerCache.getDB(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;)Lorg/apache/hadoop/ozone/container/common/utils/ReferenceCountedDB; (339 bytes) @ 0x00007f4e85f200fc [0x00007f4e85f1ec00+0x00000000000014fc]
      J 21274 c2 org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.getBlock(Lorg/apache/hadoop/ozone/container/common/interfaces/Container;Lorg/apache/hadoop/hdds/client/BlockID;)Lorg/apache/hadoop/ozone/container/common/helpers/BlockData; (299 bytes) @ 0x00007f4e85d95cf8 [0x00007f4e85d95b80+0x0000000000000178]
      J 21490 c2 org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto; (1105 bytes) @ 0x00007f4e85ebb89c [0x00007f4e85eb9100+0x000000000000279c]
      J 20711 c2 org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto; (38 bytes) @ 0x00007f4e85bacf00 [0x00007f4e85baccc0+0x0000000000000240]
      J 20718 c2 org.apache.hadoop.ozone.container.common.transport.server.GrpcXceiverService$1.onNext(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;)V (52 bytes) @ 0x00007f4e85bb71c8 [0x00007f4e85bb7160+0x0000000000000068]
      J 20363 c2 org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1MessagesAvailable.runInContext()V (77 bytes) @ 0x00007f4e8583e290 [0x00007f4e8583cda0+0x00000000000014f0]
      J 16086 c2 org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run()V (35 bytes) @ 0x00007f4e855d0fb0 [0x00007f4e855d0b00+0x00000000000004b0]
      J 21355 c2 org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run()V (99 bytes) @ 0x00007f4e85e7469c [0x00007f4e85e745a0+0x00000000000000fc]
      J 20042 c2 java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V java.base@11.0.5 (187 bytes) @ 0x00007f4e855b7780 [0x00007f4e855b75a0+0x00000000000001e0]
      J 17003 c1 java.util.concurrent.ThreadPoolExecutor$Worker.run()V java.base@11.0.5 (9 bytes) @ 0x00007f4e7d27e9e4 [0x00007f4e7d27e940+0x00000000000000a4]
      J 11466 c1 java.lang.Thread.run()V java.base@11.0.5 (17 bytes) @ 0x00007f4e7e77fcd4 [0x00007f4e7e77fb60+0x0000000000000174]
      v ~StubRoutines::call_stub

      siginfo: si_signo: 11 (SIGSEGV), si_code: 1 (SEGV_MAPERR), si_addr: 0x0000000000000068

      Attachments

        Activity

          This comment will be Viewable by All Users Viewable by All Users
          Cancel

          People

            ritesh Ritesh Shukla Assign to me
            ritesh Ritesh Shukla
            Votes:
            0 Vote for this issue
            Watchers:
            7 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Slack

                Issue deployment