Uploaded image for project: 'Flink'
  1. Flink
  2. FLINK-22932

RocksDBStateBackendWindowITCase fails with savepoint timeout

    XMLWordPrintableJSON

    Details

      Description

      Initially reported in FLINK-22067

      https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=18709&view=logs&j=a8bc9173-2af6-5ba8-775c-12063b4f1d54&t=46a16c18-c679-5905-432b-9be5d8e27bc6&l=10183

      Savepoint is triggered but is not completed in time.

      2021-06-06T22:27:46.4845045Z Jun 06 22:27:46 java.lang.RuntimeException: Failed to take savepoint
      2021-06-06T22:27:46.4846088Z Jun 06 22:27:46 	at org.apache.flink.state.api.utils.SavepointTestBase.takeSavepoint(SavepointTestBase.java:71)
      2021-06-06T22:27:46.4847049Z Jun 06 22:27:46 	at org.apache.flink.state.api.utils.SavepointTestBase.takeSavepoint(SavepointTestBase.java:46)
      2021-06-06T22:27:46.4848262Z Jun 06 22:27:46 	at org.apache.flink.state.api.SavepointWindowReaderITCase.testApplyEvictorWindowStateReader(SavepointWindowReaderITCase.java:350)
      2021-06-06T22:27:46.4854133Z Jun 06 22:27:46 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
      2021-06-06T22:27:46.4855430Z Jun 06 22:27:46 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
      2021-06-06T22:27:46.4856528Z Jun 06 22:27:46 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
      2021-06-06T22:27:46.4857487Z Jun 06 22:27:46 	at java.lang.reflect.Method.invoke(Method.java:498)
      2021-06-06T22:27:46.4858685Z Jun 06 22:27:46 	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
      2021-06-06T22:27:46.4859773Z Jun 06 22:27:46 	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
      2021-06-06T22:27:46.4860964Z Jun 06 22:27:46 	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
      2021-06-06T22:27:46.4862306Z Jun 06 22:27:46 	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
      2021-06-06T22:27:46.4863756Z Jun 06 22:27:46 	at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
      2021-06-06T22:27:46.4864993Z Jun 06 22:27:46 	at org.apache.flink.util.TestNameProvider$1.evaluate(TestNameProvider.java:45)
      2021-06-06T22:27:46.4866179Z Jun 06 22:27:46 	at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
      2021-06-06T22:27:46.4867272Z Jun 06 22:27:46 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
      2021-06-06T22:27:46.4868255Z Jun 06 22:27:46 	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
      2021-06-06T22:27:46.4869045Z Jun 06 22:27:46 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
      2021-06-06T22:27:46.4869902Z Jun 06 22:27:46 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
      2021-06-06T22:27:46.4871038Z Jun 06 22:27:46 	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
      2021-06-06T22:27:46.4871756Z Jun 06 22:27:46 	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
      2021-06-06T22:27:46.4872502Z Jun 06 22:27:46 	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
      2021-06-06T22:27:46.4873389Z Jun 06 22:27:46 	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
      2021-06-06T22:27:46.4874150Z Jun 06 22:27:46 	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
      2021-06-06T22:27:46.4874914Z Jun 06 22:27:46 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
      2021-06-06T22:27:46.4875661Z Jun 06 22:27:46 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
      2021-06-06T22:27:46.4876382Z Jun 06 22:27:46 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
      2021-06-06T22:27:46.4877018Z Jun 06 22:27:46 	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
      2021-06-06T22:27:46.4877661Z Jun 06 22:27:46 	at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
      2021-06-06T22:27:46.4878522Z Jun 06 22:27:46 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
      2021-06-06T22:27:46.4879506Z Jun 06 22:27:46 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
      2021-06-06T22:27:46.4880246Z Jun 06 22:27:46 	at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
      2021-06-06T22:27:46.4881025Z Jun 06 22:27:46 	at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
      2021-06-06T22:27:46.4881839Z Jun 06 22:27:46 	at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
      2021-06-06T22:27:46.4882650Z Jun 06 22:27:46 	at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
      2021-06-06T22:27:46.4883596Z Jun 06 22:27:46 	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
      2021-06-06T22:27:46.4884971Z Jun 06 22:27:46 Caused by: java.util.concurrent.ExecutionException: java.util.concurrent.TimeoutException: Invocation of public default java.util.concurrent.CompletableFuture org.apache.flink.runtime.webmonitor.RestfulGateway.triggerSavepoint(org.apache.flink.api.common.JobID,java.lang.String,boolean,org.apache.flink.api.common.time.Time) timed out.
      2021-06-06T22:27:46.4886218Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
      2021-06-06T22:27:46.4887018Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1928)
      2021-06-06T22:27:46.4887787Z Jun 06 22:27:46 	at org.apache.flink.state.api.utils.SavepointTestBase.takeSavepoint(SavepointTestBase.java:69)
      2021-06-06T22:27:46.4888521Z Jun 06 22:27:46 	... 34 more
      2021-06-06T22:27:46.4889560Z Jun 06 22:27:46 Caused by: java.util.concurrent.TimeoutException: Invocation of public default java.util.concurrent.CompletableFuture org.apache.flink.runtime.webmonitor.RestfulGateway.triggerSavepoint(org.apache.flink.api.common.JobID,java.lang.String,boolean,org.apache.flink.api.common.time.Time) timed out.
      2021-06-06T22:27:46.4890708Z Jun 06 22:27:46 	at com.sun.proxy.$Proxy32.triggerSavepoint(Unknown Source)
      2021-06-06T22:27:46.4891470Z Jun 06 22:27:46 	at org.apache.flink.runtime.minicluster.MiniCluster.lambda$triggerSavepoint$8(MiniCluster.java:716)
      2021-06-06T22:27:46.4892292Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
      2021-06-06T22:27:46.4893139Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture.uniApplyStage(CompletableFuture.java:628)
      2021-06-06T22:27:46.4894022Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture.thenApply(CompletableFuture.java:1996)
      2021-06-06T22:27:46.4894810Z Jun 06 22:27:46 	at org.apache.flink.runtime.minicluster.MiniCluster.runDispatcherCommand(MiniCluster.java:751)
      2021-06-06T22:27:46.4895876Z Jun 06 22:27:46 	at org.apache.flink.runtime.minicluster.MiniCluster.triggerSavepoint(MiniCluster.java:714)
      2021-06-06T22:27:46.4896736Z Jun 06 22:27:46 	at org.apache.flink.client.program.MiniClusterClient.triggerSavepoint(MiniClusterClient.java:101)
      2021-06-06T22:27:46.4897610Z Jun 06 22:27:46 	at org.apache.flink.state.api.utils.SavepointTestBase.triggerSavepoint(SavepointTestBase.java:93)
      2021-06-06T22:27:46.4898651Z Jun 06 22:27:46 	at org.apache.flink.state.api.utils.SavepointTestBase.lambda$takeSavepoint$0(SavepointTestBase.java:68)
      2021-06-06T22:27:46.4899492Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:966)
      2021-06-06T22:27:46.4900311Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
      2021-06-06T22:27:46.4901105Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
      2021-06-06T22:27:46.4901882Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1646)
      2021-06-06T22:27:46.4902703Z Jun 06 22:27:46 	at java.util.concurrent.CompletableFuture$AsyncRun.exec(CompletableFuture.java:1632)
      2021-06-06T22:27:46.4903544Z Jun 06 22:27:46 	at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
      2021-06-06T22:27:46.4904457Z Jun 06 22:27:46 	at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
      2021-06-06T22:27:46.4905221Z Jun 06 22:27:46 	at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
      2021-06-06T22:27:46.4905948Z Jun 06 22:27:46 	at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
      2021-06-06T22:27:46.4908488Z Jun 06 22:27:46 Caused by: akka.pattern.AskTimeoutException: Ask timed out on [Actor[akka://flink/user/rpc/dispatcher_2#1085446192]] after [10000 ms]. Message of type [org.apache.flink.runtime.rpc.messages.LocalFencedMessage]. A typical reason for `AskTimeoutException` is that the recipient actor didn't send a reply.
      2021-06-06T22:27:46.4909806Z Jun 06 22:27:46 	at akka.pattern.PromiseActorRef$.$anonfun$defaultOnTimeout$1(AskSupport.scala:635)
      2021-06-06T22:27:46.4910572Z Jun 06 22:27:46 	at akka.pattern.PromiseActorRef$.$anonfun$apply$1(AskSupport.scala:650)
      2021-06-06T22:27:46.4911233Z Jun 06 22:27:46 	at akka.actor.Scheduler$$anon$4.run(Scheduler.scala:205)
      2021-06-06T22:27:46.4911980Z Jun 06 22:27:46 	at scala.concurrent.Future$InternalCallbackExecutor$.unbatchedExecute(Future.scala:870)
      2021-06-06T22:27:46.4912770Z Jun 06 22:27:46 	at scala.concurrent.BatchingExecutor.execute(BatchingExecutor.scala:109)
      2021-06-06T22:27:46.4913636Z Jun 06 22:27:46 	at scala.concurrent.BatchingExecutor.execute$(BatchingExecutor.scala:103)
      2021-06-06T22:27:46.4914406Z Jun 06 22:27:46 	at scala.concurrent.Future$InternalCallbackExecutor$.execute(Future.scala:868)
      2021-06-06T22:27:46.4915259Z Jun 06 22:27:46 	at akka.actor.LightArrayRevolverScheduler$TaskHolder.executeTask(LightArrayRevolverScheduler.scala:328)
      2021-06-06T22:27:46.4916164Z Jun 06 22:27:46 	at akka.actor.LightArrayRevolverScheduler$$anon$3.executeBucket$1(LightArrayRevolverScheduler.scala:279)
      2021-06-06T22:27:46.4917078Z Jun 06 22:27:46 	at akka.actor.LightArrayRevolverScheduler$$anon$3.nextTick(LightArrayRevolverScheduler.scala:283)
      2021-06-06T22:27:46.4917924Z Jun 06 22:27:46 	at akka.actor.LightArrayRevolverScheduler$$anon$3.run(LightArrayRevolverScheduler.scala:235)
      2021-06-06T22:27:46.4918737Z Jun 06 22:27:46 	at java.lang.Thread.run(Thread.java:748)
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                trohrmann Till Rohrmann
                Reporter:
                roman Roman Khachatryan
              • Votes:
                0 Vote for this issue
                Watchers:
                5 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: