Kafka官方提供的RoundRobinPartitioner出现奇偶数据不均匀

Kafka官方提供的RoundRobinPartitioner出现奇偶数据不均匀

参考:

https://www.cnblogs.com/cbc-onne/p/18140043

  1. 使用RoundRobinPartitioner
bash 复制代码
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.clients.producer;

import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The "Round-Robin" partitioner - MODIFIED TO WORK PROPERLY WITH STICKY PARTITIONING (KIP-480)
 * <p>
 * This partitioning strategy can be used when user wants to distribute the writes to all
 * partitions equally. This is the behaviour regardless of record key hash.
 */
public class RoundRobinPartitioner implements Partitioner {
    private static final Logger LOGGER = LoggerFactory.getLogger(RoundRobinPartitioner.class);
    private final ConcurrentMap<String, AtomicInteger> topicCounterMap = new ConcurrentHashMap<>();
    private final ConcurrentMap<String, Queue<Integer>> topicPartitionQueueMap = new ConcurrentHashMap<>();

    public void configure(Map<String, ?> configs) {
    }

    /**
     * Compute the partition for the given record.
     *
     * @param topic      The topic name
     * @param key        The key to partition on (or null if no key)
     * @param keyBytes   serialized key to partition on (or null if no key)
     * @param value      The value to partition on or null
     * @param valueBytes serialized value to partition on or null
     * @param cluster    The current cluster metadata
     */
    @Override
    public int partition(
        String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        Queue<Integer> partitionQueue = partitionQueueComputeIfAbsent(topic);
        Integer queuedPartition = partitionQueue.poll();
        if (queuedPartition != null) {
            LOGGER.trace("Partition chosen from queue: {}", queuedPartition);
            return queuedPartition;
        } else {
            List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
            int numPartitions = partitions.size();
            int nextValue = nextValue(topic);
            List<PartitionInfo> availablePartitions = cluster.availablePartitionsForTopic(topic);
            if (!availablePartitions.isEmpty()) {
                int part = Utils.toPositive(nextValue) % availablePartitions.size();
                int partition = availablePartitions.get(part).partition();
                LOGGER.trace("Partition chosen: {}", partition);
                return partition;
            } else {
                // no partitions are available, give a non-available partition
                return Utils.toPositive(nextValue) % numPartitions;
            }
        }
    }

    private int nextValue(String topic) {
        AtomicInteger counter =
            topicCounterMap.computeIfAbsent(
                topic,
                k -> {
                    return new AtomicInteger(0);
                });
        return counter.getAndIncrement();
    }

    private Queue<Integer> partitionQueueComputeIfAbsent(String topic) {
        return topicPartitionQueueMap.computeIfAbsent(topic, k -> {
            return new ConcurrentLinkedQueue<>();
        });
    }

    public void close() {
    }

    /**
     * Notifies the partitioner a new batch is about to be created. When using the sticky partitioner,
     * this method can change the chosen sticky partition for the new batch.
     *
     * @param topic         The topic name
     * @param cluster       The current cluster metadata
     * @param prevPartition The partition previously selected for the record that triggered a new
     *                      batch
     */
    @Override
    public void onNewBatch(String topic, Cluster cluster, int prevPartition) {
        LOGGER.trace("New batch so enqueuing partition {} for topic {}", prevPartition, topic);
        Queue<Integer> partitionQueue = partitionQueueComputeIfAbsent(topic);
        partitionQueue.add(prevPartition);
    }
}
相关推荐
weisian1514 小时前
Redis篇--常见问题篇7--缓存一致性2(分布式事务框架Seata)
redis·分布式·缓存
不能只会打代码4 小时前
Java并发编程框架之综合案例—— 分布式日志分析系统(七)
java·开发语言·分布式·java并发框架
Elastic 中国社区官方博客4 小时前
如何通过 Kafka 将数据导入 Elasticsearch
大数据·数据库·分布式·elasticsearch·搜索引擎·kafka·全文检索
马剑威(威哥爱编程)5 小时前
分布式Python计算服务MaxFrame使用心得
开发语言·分布式·python·阿里云
学计算机的睿智大学生6 小时前
Hadoop的生态系统所包含的组件
大数据·hadoop·分布式
神秘打工猴8 小时前
Kafka 监控都有哪些?
分布式·kafka
Kobebryant-Manba9 小时前
kafka基本概念
分布式·学习·kafka
rainoway10 小时前
CRDT宝典 - yata算法
前端·分布式·算法
hanbarger11 小时前
分布式通信,微服务协调组件,zookeeper
分布式·zookeeper·中间件