Uploaded image for project: 'Ignite'
  1. Ignite
  2. IGNITE-11620

GridDhtInvalidPartitionException stops the cluster

    XMLWordPrintableJSON

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Critical
    • Resolution: Duplicate
    • Affects Version/s: 2.6, 2.7
    • Fix Version/s: None
    • Component/s: None
    • Labels:
      None

      Description

      When injecting data and having it expired at the same time rebalancing occurs,
      GridDhtInvalidPartitionException triggers SYSTEM_WORKER_TERMINATION.

      This can cause cascading failures in the cluster and take the whole cluster down.

      Simple test case:

      import org.apache.ignite.IgniteCache;
      import org.apache.ignite.configuration.CacheConfiguration;
      import org.apache.ignite.configuration.IgniteConfiguration;
      import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
      import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
      import org.apache.ignite.spi.discovery.tcp.ipfinder.TcpDiscoveryIpFinder;
      import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
      import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
      
      import javax.cache.expiry.CreatedExpiryPolicy;
      import javax.cache.expiry.Duration;
      import java.util.concurrent.CountDownLatch;
      import java.util.concurrent.TimeUnit;
      
      import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC;
      import static org.apache.ignite.cache.CacheMode.PARTITIONED;
      
      /**
       *
       */
      public class ExpireWhileRebalanceTest extends GridCommonAbstractTest {
          private static final int ENTRIES = 500000;
      
          /**
           *
           */
          protected static final TcpDiscoveryIpFinder IP_FINDER = new TcpDiscoveryVmIpFinder(true);
      
          /**
           * {@inheritDoc}
           */
          @Override
          protected IgniteConfiguration getConfiguration(String gridName) throws Exception {
              IgniteConfiguration cfg = super.getConfiguration(gridName);
      
              ((TcpDiscoverySpi) cfg.getDiscoverySpi()).setIpFinder(IP_FINDER);
      
              cfg.setFailureHandler(new StopNodeOrHaltFailureHandler());
      
              CacheConfiguration<Object, Object> ccfg = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
      
              ccfg.setAtomicityMode(ATOMIC);
              ccfg.setCacheMode(PARTITIONED);
              ccfg.setExpiryPolicyFactory(CreatedExpiryPolicy.factoryOf(new Duration(TimeUnit.SECONDS, 1)));
      
              cfg.setCacheConfiguration(ccfg);
      
              return cfg;
          }
      
          /**
           * @throws Exception If failed.
           */
          public void testExpireWhileRebalancing() throws Exception {
              startGridsMultiThreaded(4);
      
              IgniteCache<Object, Object> cache = ignite(0).cache(DEFAULT_CACHE_NAME);
      
              CountDownLatch latch = new CountDownLatch(1);
      
              new Thread(() -> {
                  for (int i = 1; i <= ENTRIES; i++) {
                      cache.put(i, i);
      
                      if (i % (ENTRIES / 10) == 0)
                          System.out.println(">>> Entries put: " + i);
                  }
                  latch.countDown();
              }).start();
      
              // stopping 0 has no effect
              stopGrid(3);
      
              awaitPartitionMapExchange();
      
              startGrid(3);
      
              latch.await(10, TimeUnit.SECONDS);
          }
      
          /**
           * {@inheritDoc}
           */
          @Override
          protected void afterTest() throws Exception {
              stopAllGrids();
          }
      }
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                Unassigned
                Reporter:
                shroman Roman Shtykh
              • Votes:
                0 Vote for this issue
                Watchers:
                4 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: