kafka指定broker副本自动分配，高级分配策略_c# kafka 指定broker

作者：空白诗007 | 2024-06-21 05:08:10

踩

c# kafka 指定broker

背景：kafka在无预期的爆发式流量增长，会造成集群机器的高负载、io打满等情况，此时副本同步严重滞后，无法增速，扩容broker的情况下，副本也没法迁移，新扩分区也是默认分配分区到所有的broker上，不会明显减轻流量压力进行分摊，此时陷入了死循环，无法解决问题。
但是通过我们指定broker进行新扩分区的副本分配，可以使得新分区全部分配在新的broker上，既不需要迁移数据，也可以分摊流量，可以完美的解决这个问题。

新分区分配到指定broker原理

通过原有的副本副本算法进行改写，遵循原有算法，只是可以指定分区和broker，分区出新的分区副本，然后通过api进行指定新分区副本进行创建
用法：go run AssignReplicasToBrokers.go -broker=xxx:9092 -broker_list=3,,4,5 -topic=test1 -partitions=7 -factor=8
-startPartition可以不指定，会自动根据已有分区计算出下一个分区，
-partitions是topic总分，会通过总分去计算出要扩的新分区，不需要指定
-factor副本可指定为2或者3，不指定就默认给3
-broker_list就是需要指定的brokerid,topic即为需要扩的topic

go api代码 ——sarama库

package main

import (
	"errors"
	"flag"
	"fmt"
	"math/rand"
	"strconv"
	"strings"

	"github.com/Shopify/sarama"
)

type conf struct {
	broker          []string
	broker_list     []int32
	topic           string
	startPartition  int32
	partitions      int32
	replica         [][]int32
	factor          int
	fixedStartIndex int32
}

func main() {

	broker := flag.String("broker", "brokerval", "enter broker")
	broker_list := flag.String("broker_list", "broker_list", "enter broker list")
	topic := flag.String("topic", "topicval", "enter topic")
	startPartition := flag.Int("startPartition", 0, "enter startPartition")
	partitions := flag.Int("partitions", 0, "enter paritions")
	factor := flag.Int("factor", 0, "enter factor")
	flag.PrintDefaults()
	flag.Parse()

	fmt.Println(*factor)
	if *factor > 3 || *factor < 2 {
		fmt.Println("factor不能大于3小于2,设置为默认3")
		errors.New("factor不能大于3或小于2,设置为默认3")
		*factor = 3
	}

	br_list := stringToIntSlice(*broker_list, ",")

	c := conf{broker: strings.Split(*broker, ","),
		broker_list:     br_list,
		topic:           *topic,
		startPartition:  int32(*startPartition),
		partitions:      int32(*partitions),
		factor:          *factor,
		fixedStartIndex: -1,
	}
	// &conf.broker = []string{"11.154.134.110:9092"}
	// &conf.broker_list = []int{2, 3, 5}
	// &conf.topic = "tme_spider_access_scheduler_10001_3"
	// &conf.partitions = int32(4)
	// &conf.fixedStartIndex = -1
	// replica := [][]int32{}
	// replica = append(replica, []int32{0, 1, 2})
	// admin, err := sarama.NewClusterAdmin(c.broker, nil)
	// if err != nil {
	// 	fmt.Println(err)
	// 	return
	// }
	client, err := sarama.NewClient(c.broker, nil)
	if err != nil {
		fmt.Println("client get err", err)
		return
	}
	ntopics, err := client.Topics()
	if err != nil {
		errors.New("get cluster topic err!")

	}
	if strInSlice(ntopics, c.topic) == false {
		panic("topic not in this cluster")
	}
	// tps := mapset.NewSetFromSlice(ntopics)
	// if tps.Contains(*topic) == false {
	// 	panic("topic not in this cluster")

	// }

	pars, err := client.Partitions(*topic)
	if err != nil {
		fmt.Println("get topic partition error", err)
	}
	topic_pars := len(pars)
	// if c.startPartition < int32(topic_pars) {
	// 	panic("起始分区小于当前的分区")
	// }
	c.startPartition = int32(topic_pars)

	if c.partitions <= int32(topic_pars) {
		panic("要扩分区数不能小于等于当前分区")
	}

	// topicinfo, err := admin.DescribeTopics([]string{*topic})
	// if err != nil {
	// 	fmt.Println("describe topic err!")
	// 	return
	// }
	// for _, topicmeta := range topicinfo {
	// 	fmt.Println(topicmeta.Partitions)
	// }
	new_pars := c.partitions - int32(topic_pars)

	// err = admin.CreatePartitions(topic, partitions, replica, false)
	// if err != nil {
	// 	fmt.Println("create partition failed", err)
	// } else {
	// 	fmt.Println("create partition success")
	// }
	// fmt.Println(replica)
	newReplicas := assignReplicas(c.broker_list, c.startPartition, new_pars, &c)
	fmt.Println(newReplicas)

	admin, err := sarama.NewClusterAdmin(c.broker, nil)
	if err != nil {
		fmt.Println("clusteradmin connect error")
	}
	err = admin.CreatePartitions(c.topic, c.partitions, newReplicas, false)
	if err != nil {
		fmt.Println("create partition failed", err)
	} else {
		fmt.Println("create partition success")
	}

}

func assignReplicas(broker_list []int32, startPartition int32, newpartition int32, c *conf) [][]int32 {
	ret := [][]int32{}
	var startIndex, nextReplicaShift int32
	var currentPartitonId int32
	if c.fixedStartIndex >= 0 {
		startIndex = c.fixedStartIndex
		nextReplicaShift = c.fixedStartIndex
	} else {
		startIndex = broker_list[rand.Intn(len(broker_list))]
		nextReplicaShift = broker_list[rand.Intn(len(broker_list))]

	}
	if startPartition > 0 {
		currentPartitonId = startPartition
	} else {
		currentPartitonId = 0
	}
	fmt.Println(startIndex, nextReplicaShift, currentPartitonId)
	for i := int32(0); i < newpartition; i++ {
		replicaBuff := []int32{}
		if currentPartitonId > 0 && currentPartitonId%int32(len(broker_list)) == 0 {
			nextReplicaShift += 1
		}
		firstReplicaIndex := (currentPartitonId + startIndex) % int32(len(broker_list))
		replicaBuff = append(replicaBuff, broker_list[firstReplicaIndex])
		for j := 0; j < c.factor-1; j++ {
			replicaBuff = append(replicaBuff, broker_list[replicaIndex(firstReplicaIndex, nextReplicaShift, int32(j), int32(len(broker_list)))])
		}
		ret = append(ret, replicaBuff)
		currentPartitonId += 1

	}
	return ret

}
func replicaIndex(firstReplicaIndex int32, secondReplicaShift int32, replicaIndex int32, nBrokers int32) int32 {
	shift := 1 + (secondReplicaShift+replicaIndex)%(nBrokers-1)
	replica := (firstReplicaIndex + shift) % nBrokers
	return replica

}

func stringToIntSlice(str string, sep string) (res []int32) {
	this := strings.Split(str, sep)
	if len(this) == 0 {
		return
	}
	for _, i := range this {
		if i == "" {
			continue
		}
		val, err := strconv.ParseInt(i, 10, 32)
		if err != nil {
			continue
		}
		res = append(res, int32(val))
	}
	return

}

func strInSlice(sl []string, m string) bool {
	set := make(map[string]struct{}, len(sl))
	for _, v := range sl {
		set[v] = struct{}{}
	}
	_, ok := set[m]
	return ok

}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

python代码 ——kafka-python

用法：python create_new_assign_partitions.py --broker=xxx:9092 --broker_list=3,,4,5 --topic=test1 --partitions=10

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
from kafka.admin import KafkaAdminClient,NewPartitions
from kafka import KafkaConsumer
import random
import argparse
class AssignReplicasToBrokers:
     def __init__(self,nPartitions,brokerlist,replicationFactor,startPartition=0):
        self.Partitions = nPartitions
        self.brokers = brokerlist
        self.factor = replicationFactor
        self.fixedStartIndex = -1
        self.startPartitionId = startPartition if startPartition else -1
        self.ret = {}
        self.min_id = min(self.brokers)
     def assignReplicas(self):
          startIndex = self.fixedStartIndex if self.fixedStartIndex >= 0 else random.choice(self.brokers)
          currentPartitionId = max(0,self.startPartitionId)
          nextReplicaShift = self.fixedStartIndex if self.fixedStartIndex >= 0 else random.choice(self.brokers)
          for i in range(self.Partitions):
              replicaBuffer = []
              if currentPartitionId >0 and (currentPartitionId % len(self.brokers) == 0):
                  nextReplicaShift += 1
              firstReplicaIndex = (currentPartitionId + startIndex) % len(self.brokers)
              replicaBuffer.append(self.brokers[firstReplicaIndex])
              for j in range(self.factor-1):
                  replicaBuffer.append(self.brokers[self.replicaIndex(firstReplicaIndex,nextReplicaShift,j,len(self.brokers))])
              self.ret[currentPartitionId] = replicaBuffer
              currentPartitionId += 1
          return self.ret 
     def replicaIndex(self,firstReplicaIndex,secondReplicaShift,replicaIndex,nBrokers):
         shift = 1 + (secondReplicaShift + replicaIndex) % (nBrokers - 1)
         replica = (firstReplicaIndex + shift) % nBrokers
         return replica

parser = argparse.ArgumentParser(description='broker,broker_list,topic,partitions')
parser.add_argument('--broker',type=str,required=True,default='')
parser.add_argument('--broker_list',type=str,required=True)
parser.add_argument('--partitions',type=int,required=True)
parser.add_argument('--topic',type=str,required=True)
parser.add_argument('--factor',type=int,default=3)
args = parser.parse_args()
broker = args.broker
broker_list = args.broker_list.split(',')
topic = args.topic
partitions = args.partitions
factor = args.factor
if factor >3 or factor <2:
   print "把factor重置为3"
   factor = 3
broker_list = [int(i) for i in broker_list if i]
admin = KafkaAdminClient(bootstrap_servers=broker)
consumer = KafkaConsumer(bootstrap_servers=broker)
pars = consumer.partitions_for_topic(topic)
num_pars = len(pars)
print num_pars
startPartition=num_pars
newPartition = partitions - num_pars
assigns = AssignReplicasToBrokers(newPartition,broker_list,factor,startPartition)
reps = assigns.assignReplicas().values()
new_partitions = {topic:NewPartitions(partitions,reps)}
res=admin.create_partitions(new_partitions)
print reps
print res
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

通过此方式可以一键进行topic的新分区副本指定broker进行分配，轻松解决突发大流量造成的机器高负载无法同步和迁移数据的问题，go可以直接执行go bulid成二进制可执行文件，放在包里面随时去使用，python的也是一样，可通过python 文件名加参数去执行，相当于kafka自带的工具啦，完美解决此类问题

本文内容由网友自发贡献，转载请注明出处：【wpsshop博客】