Details
-
Bug
-
Status: To Do
-
Major
-
Resolution: Unresolved
-
None
-
None
Description
Setting ``MXNET_MKLDNN_DEBUG=1`` as environment variable will produce the following error in tests. This happens across all configurations and seeds. I do not think that this is a test failure.
====================================================================== ERROR: test_gluon_model_zoo.test_models ---------------------------------------------------------------------- Traceback (most recent call last): File "/usr/local/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest self.test(*self.arg) File "/work/mxnet/tests/python/unittest/common.py", line 157, in test_new orig_test(*args, **kwargs) File "/work/mxnet/tests/python/unittest/test_gluon_model_zoo.py", line 50, in test_models model(mx.nd.random.uniform(shape=data_shape)).wait_to_read() File "/work/mxnet/python/mxnet/ndarray/ndarray.py", line 1650, in wait_to_read check_call(_LIB.MXNDArrayWaitToRead(self.handle)) File "/work/mxnet/python/mxnet/base.py", line 149, in check_call raise MXNetError(py_str(_LIB.MXGetLastError())) MXNetError: [17:10:12] src/operator/nn/mkldnn/mkldnn_base.cc:395: Check failed: similar Stack trace returned 10 entries: [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x5b) [0x7f06ccf3745b] [bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x28) [0x7f06ccf38478] [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::OpCheck::Run(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)>, nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)+0x3ca8) [0x7f06ccf54198] [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x2a910d9) [0x7f06cf55a0d9] [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFComputeEx(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&):: {lambda(mxnet::RunContext)#1} >::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x7c) [0x7f06cf77608c] [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x3148fdb) [0x7f06cfc11fdb] [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0xcb5) [0x7f06cfc0b1a5] [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool):: {lambda()#1} ::operator()() const:: {lambda(std::shared_ptr<dmlc::ManualEvent>)#1} >::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0xd9) [0x7f06cfc1d309] [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::thread::_Impl<std::_Bind_simple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)> (std::shared_ptr<dmlc::ManualEvent>)> >::_M_run()+0x4a) [0x7f06cfc1c43a] [bt] (9) /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xb8c80) [0x7f06d7ca4c80] -------------------- >> begin captured stdout << --------------------- ResNetV1( (features): HybridSequential( (0): Conv2D(None -> 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False) (4): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (5): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(64 -> 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) (downsample): HybridSequential( (0): Conv2D(64 -> 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (6): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(128 -> 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) (downsample): HybridSequential( (0): Conv2D(128 -> 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (7): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) (downsample): HybridSequential( (0): Conv2D(256 -> 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (8): GlobalAvgPool2D(size=(1, 1), stride=(1, 1), padding=(0, 0), ceil_mode=True) ) (output): Dense(512 -> 1000, linear) ) ResNetV1( (features): HybridSequential( (0): Conv2D(None -> 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False) (4): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (2): BasicBlockV1( (body): HybridSequential( (0): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(64 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (5): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(64 -> 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) (downsample): HybridSequential( (0): Conv2D(64 -> 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (2): BasicBlockV1( (body): HybridSequential( (0): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (3): BasicBlockV1( (body): HybridSequential( (0): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(128 -> 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (6): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(128 -> 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) (downsample): HybridSequential( (0): Conv2D(128 -> 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (2): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (3): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (4): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (5): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(256 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (7): HybridSequential( (0): BasicBlockV1( (body): HybridSequential( (0): Conv2D(256 -> 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) (downsample): HybridSequential( (0): Conv2D(256 -> 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (1): BasicBlockV1( (body): HybridSequential( (0): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) (2): BasicBlockV1( (body): HybridSequential( (0): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) (2): Activation(relu) (3): Conv2D(512 -> 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm(fix_gamma=False, use_global_stats=False, eps=1e-05, momentum=0.9, axis=1, in_channels=None) ) ) ) (8): GlobalAvgPool2D(size=(1, 1), stride=(1, 1), padding=(0, 0), ceil_mode=True) ) (output): Dense(512 -> 1000, linear) ) --------------------- >> end captured stdout << ---------------------- -------------------- >> begin captured logging << -------------------- common: INFO: Setting module np/mx/python random seeds, use MXNET_MODULE_SEED=1825457337 to reproduce. common: INFO: Setting test np/mx/python random seeds, use MXNET_TEST_SEED=1579343143 to reproduce. --------------------- >> end captured logging << ---------------------
Attachments
Issue Links
- links to