Kafka官方提供的RoundRobinPartitioner出现奇偶数据不均匀

Kafka官方提供的RoundRobinPartitioner出现奇偶数据不均匀

参考:

https://www.cnblogs.com/cbc-onne/p/18140043

  1. 使用RoundRobinPartitioner
bash 复制代码
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.clients.producer;

import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The "Round-Robin" partitioner - MODIFIED TO WORK PROPERLY WITH STICKY PARTITIONING (KIP-480)
 * <p>
 * This partitioning strategy can be used when user wants to distribute the writes to all
 * partitions equally. This is the behaviour regardless of record key hash.
 */
public class RoundRobinPartitioner implements Partitioner {
    private static final Logger LOGGER = LoggerFactory.getLogger(RoundRobinPartitioner.class);
    private final ConcurrentMap<String, AtomicInteger> topicCounterMap = new ConcurrentHashMap<>();
    private final ConcurrentMap<String, Queue<Integer>> topicPartitionQueueMap = new ConcurrentHashMap<>();

    public void configure(Map<String, ?> configs) {
    }

    /**
     * Compute the partition for the given record.
     *
     * @param topic      The topic name
     * @param key        The key to partition on (or null if no key)
     * @param keyBytes   serialized key to partition on (or null if no key)
     * @param value      The value to partition on or null
     * @param valueBytes serialized value to partition on or null
     * @param cluster    The current cluster metadata
     */
    @Override
    public int partition(
        String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        Queue<Integer> partitionQueue = partitionQueueComputeIfAbsent(topic);
        Integer queuedPartition = partitionQueue.poll();
        if (queuedPartition != null) {
            LOGGER.trace("Partition chosen from queue: {}", queuedPartition);
            return queuedPartition;
        } else {
            List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
            int numPartitions = partitions.size();
            int nextValue = nextValue(topic);
            List<PartitionInfo> availablePartitions = cluster.availablePartitionsForTopic(topic);
            if (!availablePartitions.isEmpty()) {
                int part = Utils.toPositive(nextValue) % availablePartitions.size();
                int partition = availablePartitions.get(part).partition();
                LOGGER.trace("Partition chosen: {}", partition);
                return partition;
            } else {
                // no partitions are available, give a non-available partition
                return Utils.toPositive(nextValue) % numPartitions;
            }
        }
    }

    private int nextValue(String topic) {
        AtomicInteger counter =
            topicCounterMap.computeIfAbsent(
                topic,
                k -> {
                    return new AtomicInteger(0);
                });
        return counter.getAndIncrement();
    }

    private Queue<Integer> partitionQueueComputeIfAbsent(String topic) {
        return topicPartitionQueueMap.computeIfAbsent(topic, k -> {
            return new ConcurrentLinkedQueue<>();
        });
    }

    public void close() {
    }

    /**
     * Notifies the partitioner a new batch is about to be created. When using the sticky partitioner,
     * this method can change the chosen sticky partition for the new batch.
     *
     * @param topic         The topic name
     * @param cluster       The current cluster metadata
     * @param prevPartition The partition previously selected for the record that triggered a new
     *                      batch
     */
    @Override
    public void onNewBatch(String topic, Cluster cluster, int prevPartition) {
        LOGGER.trace("New batch so enqueuing partition {} for topic {}", prevPartition, topic);
        Queue<Integer> partitionQueue = partitionQueueComputeIfAbsent(topic);
        partitionQueue.add(prevPartition);
    }
}
相关推荐
winfield8211 小时前
Kafka 线上问题排查完整手册
kafka
Cxzzzzzzzzzz4 小时前
RabbitMQ 在实际开发中的应用场景与实现方案
分布式·rabbitmq
在未来等你4 小时前
Kafka面试精讲 Day 16:生产者性能优化策略
大数据·分布式·面试·kafka·消息队列
王大帅の王同学4 小时前
Thinkphp6接入讯飞星火大模型Spark Lite完全免费的API
大数据·分布式·spark
一氧化二氢.h6 小时前
通俗解释redis高级:redis持久化(RDB持久化、AOF持久化)、redis主从、redis哨兵、redis分片集群
redis·分布式·缓存
爱睡觉的圈圈10 小时前
分布式IP代理集群架构与智能调度系统
分布式·tcp/ip·架构
APItesterCris12 小时前
构建分布式京东商品数据采集系统:基于 API 的微服务实现方案
分布式·微服务·架构
free13 小时前
基于librdkafa C++客户端生产者发送数据失败问题处理#2
c++·kafka
不吃饭的猪13 小时前
kafka启动小脚本
分布式·kafka
休息一下接着来14 小时前
MinIO 分布式模式与纠删码
分布式·minio