Uploaded image for project: 'Flink'
  1. Flink
  2. FLINK-17663

CheckpointBarrierUnaligner.getFlattenedChannelIndex can throw ArrayIndexOutOfBoundsException

    XMLWordPrintableJSON

Details

    Description

      KeyedStateCheckpointingITCase with enabled unaligned checkpoints always life locks because of infinite failure loop caused by the following exception:

      java.io.IOException: java.lang.ArrayIndexOutOfBoundsException: 1
      	at org.apache.flink.runtime.io.network.partition.consumer.InputChannel.checkError(InputChannel.java:224) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.getNextBuffer(RemoteInputChannel.java:173) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.waitAndGetNextData(SingleInputGate.java:637) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.getNextBufferOrEvent(SingleInputGate.java:615) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate.pollNext(SingleInputGate.java:603) ~[classes/:?]
      	at org.apache.flink.runtime.taskmanager.InputGateWithMetrics.pollNext(InputGateWithMetrics.java:105) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.UnionInputGate.waitAndGetNextData(UnionInputGate.java:193) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.UnionInputGate.getNextBufferOrEvent(UnionInputGate.java:168) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.UnionInputGate.pollNext(UnionInputGate.java:160) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.io.CheckpointedInputGate.pollNext(CheckpointedInputGate.java:110) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.emitNext(StreamTaskNetworkInput.java:136) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:66) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:340) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxStep(MailboxProcessor.java:206) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:196) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:553) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:525) ~[classes/:?]
      	at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:713) [classes/:?]
      	at org.apache.flink.runtime.taskmanager.Task.run(Task.java:539) [classes/:?]
      	at java.lang.Thread.run(Thread.java:748) [?:1.8.0_131]
      	Suppressed: java.lang.AssertionError: Test ineffective: Function cleanly finished without ever failing.
      		at org.junit.Assert.fail(Assert.java:88) ~[junit-4.12.jar:4.12]
      		at org.apache.flink.test.checkpointing.KeyedStateCheckpointingITCase$OnceFailingPartitionedSum.close(KeyedStateCheckpointingITCase.java:325) ~[test-classes/:?]
      		at org.apache.flink.api.common.functions.util.FunctionUtils.closeFunction(FunctionUtils.java:43) ~[classes/:?]
      		at org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.dispose(AbstractUdfStreamOperator.java:117) ~[classes/:?]
      		at org.apache.flink.streaming.runtime.tasks.StreamTask.disposeAllOperators(StreamTask.java:698) ~[classes/:?]
      		at org.apache.flink.streaming.runtime.tasks.StreamTask.cleanUpInvoke(StreamTask.java:630) ~[classes/:?]
      		at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:537) ~[classes/:?]
      		at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:713) [classes/:?]
      		at org.apache.flink.runtime.taskmanager.Task.run(Task.java:539) [classes/:?]
      		at java.lang.Thread.run(Thread.java:748) [?:1.8.0_131]
      Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
      	at org.apache.flink.streaming.runtime.io.CheckpointBarrierUnaligner.getFlattenedChannelIndex(CheckpointBarrierUnaligner.java:256) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.io.CheckpointBarrierUnaligner.access$000(CheckpointBarrierUnaligner.java:55) ~[classes/:?]
      	at org.apache.flink.streaming.runtime.io.CheckpointBarrierUnaligner$ThreadSafeUnaligner.notifyBarrierReceived(CheckpointBarrierUnaligner.java:311) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel.onBuffer(RemoteInputChannel.java:472) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.netty.CreditBasedPartitionRequestClientHandler.decodeBufferOrEvent(CreditBasedPartitionRequestClientHandler.java:303) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.netty.CreditBasedPartitionRequestClientHandler.decodeMsg(CreditBasedPartitionRequestClientHandler.java:267) ~[classes/:?]
      	at org.apache.flink.runtime.io.network.netty.CreditBasedPartitionRequestClientHandler.channelRead(CreditBasedPartitionRequestClientHandler.java:182) ~[classes/:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.runtime.io.network.netty.NettyMessageClientDecoderDelegate.channelRead(NettyMessageClientDecoderDelegate.java:115) ~[classes/:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1421) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:930) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:697) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:632) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:549) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:511) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:918) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	at org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) ~[flink-shaded-netty-4.1.39.Final-10.0.jar:?]
      	... 1 more
      

      Attachments

        Issue Links

          Activity

            People

              pnowojski Piotr Nowojski
              pnowojski Piotr Nowojski
              Votes:
              0 Vote for this issue
              Watchers:
              1 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: