Uploaded image for project: 'Kafka'
  1. Kafka
  2. KAFKA-1850

Failed reassignment leads to additional replica

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Open
    • Minor
    • Resolution: Unresolved
    • 0.8.1
    • None
    • controller
    • CentOS (Linux Kernel 2.6.32-71.el6.x86_64 )

    Description

      When I start a topic reassignment (Totally 36 partitions) in my Kafka Cluster, 24 partitions succeeded and 12 ones failed. However, the 12 failed partitions have more replicas. I think the reason is that AR still consists of RAR and OAR although the reassignment for the partition failed. Could we regard this problem as a bug? Quite sorry if any mistake in my question, since I am a beginner for Kafka.

      This is the output from operation:

      1. alex-topics-to-move.json:
      {"topics": [

      {"topic": "testingTopic"}

      ],
      "version":1
      }

      2. Generate a reassignment plan
      $./kafka-reassign-partitions.sh --generate --broker-list 0,1,2,3,4 --topics-to-move-json-file ./alex-topics-to-move.json --zookeeper 192.168.112.95:2181,192.168.112.96:2181,192.168.112.97:2181,192.168.112.98:2181,192.168.112.99:2181
      Current partition replica assignment
      {"version":1,
      "partitions":[

      {"topic":"testingTopic","partition":27,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":1,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":12,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":6,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":16,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":32,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":18,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":31,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":9,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":23,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":19,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":34,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":17,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":7,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":20,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":8,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":11,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":3,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":30,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":35,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":26,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":22,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":10,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":24,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":21,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":15,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":4,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":28,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":25,"replicas":[1,2]}

      ,:

      {"topic":"testingTopic","partition":14,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":2,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":13,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":5,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":29,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":33,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":0,"replicas":[0,1]}

      ]}

      Proposed partition reassignment configuration ( alex-expand-cluster-reassignment.json )
      {"version":1,
      "partitions":[

      {"topic":"testingTopic","partition":27,"replicas":[0,4]}

      ,

      {"topic":"testingTopic","partition":1,"replicas":[4,2]}

      ,

      {"topic":"testingTopic","partition":12,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":6,"replicas":[4,3]}

      ,

      {"topic":"testingTopic","partition":16,"replicas":[4,1]}

      ,

      {"topic":"testingTopic","partition":32,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":18,"replicas":[1,3]}

      ,

      {"topic":"testingTopic","partition":31,"replicas":[4,0]}

      ,

      {"topic":"testingTopic","partition":23,"replicas":[1,4]}

      ,

      {"topic":"testingTopic","partition":9,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":19,"replicas":[2,4]}

      ,

      {"topic":"testingTopic","partition":34,"replicas":[2,3]}

      ,

      {"topic":"testingTopic","partition":17,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":20,"replicas":[3,1]}

      ,

      {"topic":"testingTopic","partition":7,"replicas":[0,4]}

      ,

      {"topic":"testingTopic","partition":8,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":11,"replicas":[4,0]}

      ,

      {"topic":"testingTopic","partition":3,"replicas":[1,4]}

      ,

      {"topic":"testingTopic","partition":35,"replicas":[3,0]}

      ,

      {"topic":"testingTopic","partition":30,"replicas":[3,4]}

      ,

      {"topic":"testingTopic","partition":26,"replicas":[4,3]}

      ,

      {"topic":"testingTopic","partition":22,"replicas":[0,3]}

      ,

      {"topic":"testingTopic","partition":10,"replicas":[3,4]}

      ,

      {"topic":"testingTopic","partition":24,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":21,"replicas":[4,2]}

      ,

      {"topic":"testingTopic","partition":15,"replicas":[3,0]}

      ,

      {"topic":"testingTopic","partition":4,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":25,"replicas":[3,2]}

      ,

      {"topic":"testingTopic","partition":28,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":14,"replicas":[2,3]}

      ,

      {"topic":"testingTopic","partition":2,"replicas":[0,3]}

      ,

      {"topic":"testingTopic","partition":13,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":5,"replicas":[3,2]}

      ,

      {"topic":"testingTopic","partition":29,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":33,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":0,"replicas":[3,1]}

      ]}

      3. Start the reassignment
      $./kafka-reassign-partitions.sh --execute --broker-list 0,1,2,3,4 --reassignment-json-file ./alex-expand-cluster-reassignment.json --zookeeper 192.168.112.85:2181,192.168.112.86:2181,192.168.112.87:2181,192.168.112.88:2181,192.168.112.89:2181
      Current partition replica assignment

      {"version":1,"partitions":[

      {"topic":"testingTopic","partition":27,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":1,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":12,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":6,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":16,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":32,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":18,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":31,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":9,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":23,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":19,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":34,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":17,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":7,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":20,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":8,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":11,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":3,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":30,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":35,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":26,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":22,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":10,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":24,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":21,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":15,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":4,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":28,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":25,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":14,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":2,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":13,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":5,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":29,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":33,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":0,"replicas":[0,1]}

      ]}

      Save this to use as the --reassignment-json-file option during rollback
      Successfully started reassignment of partitions {"version":1,"partitions":[

      {"topic":"testingTopic","partition":27,"replicas":[0,4]}

      ,

      {"topic":"testingTopic","partition":1,"replicas":[4,2]}

      ,

      {"topic":"testingTopic","partition":12,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":6,"replicas":[4,3]}

      ,

      {"topic":"testingTopic","partition":16,"replicas":[4,1]}

      ,

      {"topic":"testingTopic","partition":32,"replicas":[0,1]}

      ,

      {"topic":"testingTopic","partition":31,"replicas":[4,0]}

      ,

      {"topic":"testingTopic","partition":18,"replicas":[1,3]}

      ,

      {"topic":"testingTopic","partition":9,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":23,"replicas":[1,4]}

      ,

      {"topic":"testingTopic","partition":19,"replicas":[2,4]}

      ,

      {"topic":"testingTopic","partition":17,"replicas":[0,2]}

      ,

      {"topic":"testingTopic","partition":34,"replicas":[2,3]}

      ,

      {"topic":"testingTopic","partition":20,"replicas":[3,1]}

      ,

      {"topic":"testingTopic","partition":7,"replicas":[0,4]}

      ,

      {"topic":"testingTopic","partition":8,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":11,"replicas":[4,0]}

      ,

      {"topic":"testingTopic","partition":3,"replicas":[1,4]}

      ,

      {"topic":"testingTopic","partition":35,"replicas":[3,0]}

      ,

      {"topic":"testingTopic","partition":30,"replicas":[3,4]}

      ,

      {"topic":"testingTopic","partition":26,"replicas":[4,3]}

      ,

      {"topic":"testingTopic","partition":22,"replicas":[0,3]}

      ,

      {"topic":"testingTopic","partition":10,"replicas":[3,4]}

      ,

      {"topic":"testingTopic","partition":24,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":21,"replicas":[4,2]}

      ,

      {"topic":"testingTopic","partition":15,"replicas":[3,0]}

      ,

      {"topic":"testingTopic","partition":4,"replicas":[2,0]}

      ,

      {"topic":"testingTopic","partition":28,"replicas":[1,0]}

      ,

      {"topic":"testingTopic","partition":25,"replicas":[3,2]}

      ,

      {"topic":"testingTopic","partition":14,"replicas":[2,3]}

      ,

      {"topic":"testingTopic","partition":2,"replicas":[0,3]}

      ,

      {"topic":"testingTopic","partition":13,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":5,"replicas":[3,2]}

      ,

      {"topic":"testingTopic","partition":29,"replicas":[2,1]}

      ,

      {"topic":"testingTopic","partition":33,"replicas":[1,2]}

      ,

      {"topic":"testingTopic","partition":0,"replicas":[3,1]}

      ]}

      4. The result of my Topic reassignment (More than 4 days so far)

      $./kafka-reassign-partitions.sh --verify -reassignment-json-file ./alex-expand-cluster-reassignment.json --zookeeper 192.168.112.85:2181,192.168.112.86:2181,192.168.112.87:2181,192.168.112.88:2181,192.168.112.89:2181
      Status of partition reassignment:
      Status of partition reassignment:
      ERROR: Assigned replicas (4,2,1) don't match the list of replicas for reassignment (4,2) for partition [testingTopic,1]
      ERROR: Assigned replicas (2,1,0) don't match the list of replicas for reassignment (2,1) for partition [testingTopic,9]
      ERROR: Assigned replicas (2,4,1) don't match the list of replicas for reassignment (2,4) for partition [testingTopic,19]
      ERROR: Assigned replicas (0,2,1) don't match the list of replicas for reassignment (0,2) for partition [testingTopic,17]
      ERROR: Assigned replicas (2,3,1,0) don't match the list of replicas for reassignment (2,3) for partition [testingTopic,34]
      ERROR: Assigned replicas (2,0,1) don't match the list of replicas for reassignment (2,0) for partition [testingTopic,24]
      ERROR: Assigned replicas (4,2,0) don't match the list of replicas for reassignment (4,2) for partition [testingTopic,21]
      ERROR: Assigned replicas (2,0,1) don't match the list of replicas for reassignment (2,0) for partition [testingTopic,4]
      ERROR: Assigned replicas (3,2,1) don't match the list of replicas for reassignment (3,2) for partition [testingTopic,25]
      ERROR: Assigned replicas (2,3,0) don't match the list of replicas for reassignment (2,3) for partition [testingTopic,14]
      ERROR: Assigned replicas (3,2,1) don't match the list of replicas for reassignment (3,2) for partition [testingTopic,5]
      ERROR: Assigned replicas (1,2,0) don't match the list of replicas for reassignment (1,2) for partition [testingTopic,33]
      Reassignment of partition [testingTopic,10] completed successfully
      Reassignment of partition [testingTopic,27] completed successfully
      Reassignment of partition [testingTopic,13] completed successfully
      Reassignment of partition [testingTopic,34] failed
      Reassignment of partition [testingTopic,8] completed successfully
      Reassignment of partition [testingTopic,25] failed
      Reassignment of partition [testingTopic,35] completed successfully
      Reassignment of partition [testingTopic,31] completed successfully
      Reassignment of partition [testingTopic,18] completed successfully
      Reassignment of partition [testingTopic,19] failed
      Reassignment of partition [testingTopic,7] completed successfully
      Reassignment of partition [testingTopic,9] failed
      Reassignment of partition [testingTopic,0] completed successfully
      Reassignment of partition [testingTopic,3] completed successfully
      Reassignment of partition [testingTopic,2] completed successfully
      Reassignment of partition [testingTopic,26] completed successfully
      Reassignment of partition [testingTopic,30] completed successfully
      Reassignment of partition [testingTopic,11] completed successfully
      Reassignment of partition [testingTopic,4] failed
      Reassignment of partition [testingTopic,24] failed
      Reassignment of partition [testingTopic,32] completed successfully
      Reassignment of partition [testingTopic,15] completed successfully
      Reassignment of partition [testingTopic,6] completed successfully
      Reassignment of partition [testingTopic,28] completed successfully
      Reassignment of partition [testingTopic,17] failed
      Reassignment of partition [testingTopic,20] completed successfully
      Reassignment of partition [testingTopic,21] failed
      Reassignment of partition [testingTopic,16] completed successfully
      Reassignment of partition [testingTopic,22] completed successfully
      Reassignment of partition [testingTopic,23] completed successfully
      Reassignment of partition [testingTopic,1] failed
      Reassignment of partition [testingTopic,5] failed
      Reassignment of partition [testingTopic,12] completed successfully
      Reassignment of partition [testingTopic,33] failed
      Reassignment of partition [testingTopic,14] failed
      Reassignment of partition [testingTopic,29] completed successfully

      5. Current Topic Status
      $./kafka-topics.sh --describe --topic testingTopic --zookeeper 192.168.112.95:2181,192.168.112.96:2181,192.168.112.97:2181,192.168.112.98:2181,192.168.112.99:2181
      Topic:halog PartitionCount:36 ReplicationFactor:2 Configs:
      Topic: halog Partition: 0 Leader: 3 Replicas: 3,1 Isr: 3,1
      Topic: halog Partition: 1 Leader: 2 Replicas: 4,2,1 Isr: 2,4,1 <====
      Topic: halog Partition: 2 Leader: 0 Replicas: 0,3 Isr: 0,3
      Topic: halog Partition: 3 Leader: 4 Replicas: 1,4 Isr: 4,1
      Topic: halog Partition: 4 Leader: 2 Replicas: 2,0,1 Isr: 2,0,1 <====
      Topic: halog Partition: 5 Leader: 2 Replicas: 3,2,1 Isr: 2,3,1 <====
      Topic: halog Partition: 6 Leader: 4 Replicas: 4,3 Isr: 4,3
      Topic: halog Partition: 7 Leader: 0 Replicas: 0,4 Isr: 4,0
      Topic: halog Partition: 8 Leader: 0 Replicas: 1,0 Isr: 0,1
      Topic: halog Partition: 9 Leader: 0 Replicas: 2,1,0 Isr: 0,2,1 <====
      Topic: halog Partition: 10 Leader: 3 Replicas: 3,4 Isr: 4,3
      Topic: halog Partition: 11 Leader: 4 Replicas: 4,0 Isr: 4,0
      Topic: halog Partition: 12 Leader: 0 Replicas: 0,1 Isr: 0,1
      Topic: halog Partition: 13 Leader: 2 Replicas: 1,2 Isr: 2,1
      Topic: halog Partition: 14 Leader: 3 Replicas: 2,3,0 Isr: 3,0,2 <====
      Topic: halog Partition: 15 Leader: 3 Replicas: 3,0 Isr: 3,0
      Topic: halog Partition: 16 Leader: 4 Replicas: 4,1 Isr: 4,1
      Topic: halog Partition: 17 Leader: 2 Replicas: 0,2,1 Isr: 2,0,1 <====
      Topic: halog Partition: 18 Leader: 1 Replicas: 1,3 Isr: 3,1
      Topic: halog Partition: 19 Leader: 2 Replicas: 2,4,1 Isr: 2,4,1 <====
      Topic: halog Partition: 20 Leader: 3 Replicas: 3,1 Isr: 3,1
      Topic: halog Partition: 21 Leader: 4 Replicas: 4,2,0 Isr: 4,0,2 <====
      Topic: halog Partition: 22 Leader: 0 Replicas: 0,3 Isr: 0,3
      Topic: halog Partition: 23 Leader: 1 Replicas: 1,4 Isr: 4,1
      Topic: halog Partition: 24 Leader: 2 Replicas: 2,0,1 Isr: 2,0,1 <====
      Topic: halog Partition: 25 Leader: 2 Replicas: 3,2,1 Isr: 2,3,1 <====
      Topic: halog Partition: 26 Leader: 4 Replicas: 4,3 Isr: 4,3
      Topic: halog Partition: 27 Leader: 0 Replicas: 0,4 Isr: 0,4
      Topic: halog Partition: 28 Leader: 0 Replicas: 1,0 Isr: 0,1
      Topic: halog Partition: 29 Leader: 2 Replicas: 2,1 Isr: 2,1
      Topic: halog Partition: 30 Leader: 3 Replicas: 3,4 Isr: 4,3
      Topic: halog Partition: 31 Leader: 4 Replicas: 4,0 Isr: 4,0
      Topic: halog Partition: 32 Leader: 0 Replicas: 0,1 Isr: 0,1
      Topic: halog Partition: 33 Leader: 0 Replicas: 1,2,0 Isr: 0,2,1 <====
      Topic: halog Partition: 34 Leader: 2 Replicas: 2,3,1,0 Isr: 2,0,3,1 <====
      Topic: halog Partition: 35 Leader: 3 Replicas: 3,0 Isr: 3,0

      Attachments

        Activity

          People

            Unassigned Unassigned
            qdutj Alex Tian
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:

              Time Tracking

                Estimated:
                Original Estimate - 504h
                504h
                Remaining:
                Remaining Estimate - 504h
                504h
                Logged:
                Time Spent - Not Specified
                Not Specified