得物Java面试被问：大规模数据的分布式排序和聚合

一、分布式排序架构概览

1. 分层排序架构

图表

代码

复制

下载

全屏

复制代码

graph TB
    A[原始数据] --> B[数据分片]
    B --> C[局部排序]
    B --> D[局部排序]
    B --> E[局部排序]
    
    C --> F[归并节点]
    D --> F
    E --> F
    
    F --> G[全局排序结果]
    
    subgraph "排序优化层"
        H[采样预估]
        I[动态分区]
        J[负载均衡]
    end
    
    H --> B
    I --> B
    J --> C

2. 核心排序模式对比

java

复制

下载

复制代码

// 分布式排序模式枚举
public enum DistributedSortPattern {
    
    /**
     * 模式1: 全排序 (Total Order Sort)
     * 所有数据全局有序
     */
    TOTAL_ORDER("全排序", "全局完全有序", "MapReduce排序", 
        Arrays.asList("全局样本", "范围分区", "全局归并")),
    
    /**
     * 模式2: 分区排序 (Partitioned Sort)
     * 每个分区内部有序，分区间无序
     */
    PARTITIONED("分区排序", "分区内有序", "Spark sortByKey", 
        Arrays.asList("哈希分区", "局部排序", "分区归并")),
    
    /**
     * 模式3: 二次排序 (Secondary Sort)
     * 主键有序，次键分组内有序
     */
    SECONDARY("二次排序", "主次键有序", "Hive Cluster By", 
        Arrays.asList("复合键", "自定义分区", "分组排序")),
    
    /**
     * 模式4: 拓扑排序 (Topological Sort)
     * 基于依赖关系排序
     */
    TOPOLOGICAL("拓扑排序", "依赖关系序", "DAG调度", 
        Arrays.asList("图遍历", "依赖检测", "并行调度"));
    
    private final String name;
    private final String description;
    private final String typicalUse;
    private final List<String> keyTechniques;
    
    DistributedSortPattern(String name, String description, 
                         String typicalUse, List<String> keyTechniques) {
        this.name = name;
        this.description = description;
        this.typicalUse = typicalUse;
        this.keyTechniques = keyTechniques;
    }
    
    /**
     * 根据数据特征选择排序模式
     */
    public static DistributedSortPattern selectPattern(DataCharacteristics characteristics) {
        // 数据量级
        long dataSize = characteristics.getDataSize();
        
        // 数据分布
        DistributionType distribution = characteristics.getDistribution();
        
        // 排序要求
        SortRequirement requirement = characteristics.getSortRequirement();
        
        if (requirement.isTotalOrder()) {
            if (dataSize < 1_000_000_000L) { // < 10亿
                return TOTAL_ORDER;
            } else {
                // 超大规模数据使用优化版全排序
                return TOTAL_ORDER;
            }
        } else if (requirement.hasSecondaryKey()) {
            return SECONDARY;
        } else if (characteristics.hasDependencies()) {
            return TOPOLOGICAL;
        } else {
            return PARTITIONED;
        }
    }
}

二、分布式排序算法实现

1. MapReduce全排序实现

java

复制

下载

复制代码

// MapReduce全排序核心实现
public class MapReduceTotalSort {
    
    /**
     * Mapper: 分区采样与局部排序
     */
    public static class SamplingMapper 
            extends Mapper<LongWritable, Text, Text, Text> {
        
        private final List<Text> samples = new ArrayList<>();
        private static final int SAMPLE_SIZE = 100000;
        
        @Override
        protected void map(LongWritable key, Text value, Context context) 
                throws IOException, InterruptedException {
            
            // 1. 采样（蓄水池采样算法）
            if (samples.size() < SAMPLE_SIZE) {
                samples.add(new Text(value));
            } else {
                // 随机替换
                int replaceIndex = (int) (Math.random() * samples.size());
                if (replaceIndex < SAMPLE_SIZE) {
                    samples.set(replaceIndex, new Text(value));
                }
            }
            
            // 2. 输出（键值交换，以数据为键）
            context.write(value, new Text(""));
        }
        
        @Override
        protected void cleanup(Context context) 
                throws IOException, InterruptedException {
            
            // 3. 发送采样数据到Reducer
            for (Text sample : samples) {
                context.write(new Text("__SAMPLE__"), sample);
            }
        }
    }
    
    /**
     * Partitioner: 基于采样数据范围分区
     */
    public static class RangePartitioner extends Partitioner<Text, Text> {
        
        private Text[] splitPoints;  // 分区边界点
        
        @Override
        public void configure(JobConf job) {
            // 从配置获取采样数据并计算分区边界
            String[] samples = job.get("mapreduce.totalorder.samples", "").split(",");
            
            // 排序采样数据
            Arrays.sort(samples);
            
            // 计算分区边界（均匀分布）
            int numPartitions = job.getNumReduceTasks();
            splitPoints = new Text[numPartitions - 1];
            
            for (int i = 0; i < splitPoints.length; i++) {
                int index = (i + 1) * samples.length / numPartitions;
                splitPoints[i] = new Text(samples[index]);
            }
        }
        
        @Override
        public int getPartition(Text key, Text value, int numPartitions) {
            // 特殊键处理
            if (key.toString().equals("__SAMPLE__")) {
                return 0;  // 采样数据发送到第一个分区
            }
            
            // 二分查找确定分区
            return findPartition(key);
        }
        
        private int findPartition(Text key) {
            int left = 0;
            int right = splitPoints.length - 1;
            
            while (left <= right) {
                int mid = left + (right - left) / 2;
                int cmp = key.compareTo(splitPoints[mid]);
                
                if (cmp == 0) {
                    return mid + 1;
                } else if (cmp < 0) {
                    right = mid - 1;
                } else {
                    left = mid + 1;
                }
            }
            
            return left;
        }
    }
    
    /**
     * Reducer: 全局归并排序
     */
    public static class TotalSortReducer 
            extends Reducer<Text, Text, Text, Text> {
        
        private boolean isSampleReducer = false;
        
        @Override
        protected void setup(Context context) {
            // 判断是否为采样数据Reducer
            isSampleReducer = (context.getTaskAttemptID().getTaskID().getId() == 0);
        }
        
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) 
                throws IOException, InterruptedException {
            
            if (isSampleReducer && key.toString().equals("__SAMPLE__")) {
                // 处理采样数据，计算分区边界
                List<String> samples = new ArrayList<>();
                for (Text value : values) {
                    samples.add(value.toString());
                }
                
                // 计算并保存分区边界
                calculateSplitPoints(samples, context);
                
            } else {
                // 正常排序输出（每个分区内已有序）
                for (Text value : values) {
                    context.write(key, value);
                }
            }
        }
    }
    
    /**
     * 完整的MapReduce排序作业配置
     */
    public static Job configureTotalSortJob(Path inputPath, Path outputPath) 
            throws IOException {
        
        Job job = Job.getInstance();
        job.setJobName("Distributed-Total-Sort");
        
        // 输入输出配置
        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);
        
        // Mapper配置
        job.setMapperClass(SamplingMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        
        // Partitioner配置
        job.setPartitionerClass(RangePartitioner.class);
        
        // Reducer配置
        job.setReducerClass(TotalSortReducer.class);
        job.setNumReduceTasks(100);  // 根据数据量调整
        
        // 排序相关配置
        job.setSortComparatorClass(Text.Comparator.class);  // 键排序器
        job.setGroupingComparatorClass(Text.Comparator.class); // 分组比较器
        
        // 性能优化配置
        job.setCombinerClass(IdentityReducer.class);  // 使用Combiner减少数据传输
        
        // 压缩配置
        job.setCompressMapOutput(true);
        job.setMapOutputCompressorClass(GzipCodec.class);
        
        return job;
    }
}

2. Spark分布式排序优化

scala

复制

下载

复制代码

// Spark分布式排序优化实现
object SparkDistributedSort {
  
  // 案例：电商订单数据排序（100亿条记录）
  case class Order(orderId: Long, userId: Long, amount: Double, timestamp: Long)
  
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("Large-Scale Order Sorting")
      .config("spark.sql.adaptive.enabled", "true")
      .config("spark.sql.adaptive.coalescePartitions.enabled", "true")
      .config("spark.sql.adaptive.skewJoin.enabled", "true")
      .config("spark.sql.shuffle.partitions", "1000")  // 初始分区数
      .master("spark://master:7077")
      .getOrCreate()
    
    // 1. 读取海量数据
    val ordersDF = spark.read
      .parquet("hdfs://orders/*.parquet")
      .repartition(1000)  // 数据重分区
    
    // 2. 智能采样预估数据分布
    val sampleDF = ordersDF.sample(0.001)  // 0.1%采样
    val distribution = analyzeDistribution(sampleDF)
    
    // 3. 基于采样的动态分区策略
    val partitionBounds = calculateOptimalPartitions(distribution)
    
    // 4. 分区排序（避免全局排序的shuffle）
    val sortedDF = ordersDF
      .sortWithinPartitions(col("timestamp").desc)  // 分区内排序
      .repartitionByRange(1000, col("userId"))  // 基于userId范围分区
    
    // 5. 启用AQE（自适应查询执行）优化
    val optimizedDF = sortedDF
      .withColumn("date", to_date(col("timestamp")))
      .repartition(col("date"))  // 按日期重新分区
    
    // 6. 写入排序结果
    optimizedDF.write
      .partitionBy("date")
      .mode(SaveMode.Overwrite)
      .parquet("hdfs://sorted-orders/")
    
    // 7. 性能监控
    monitorSortingPerformance(spark, optimizedDF)
    
    spark.stop()
  }
  
  /**
   * 分析数据分布特征
   */
  def analyzeDistribution(df: DataFrame): DistributionAnalysis = {
    val stats = df.select(
      count("*").as("total_count"),
      approx_count_distinct(col("userId")).as("distinct_users"),
      min(col("timestamp")).as("min_time"),
      max(col("timestamp")).as("max_time"),
      percentile_approx(col("amount"), 0.5).as("median_amount"),
      percentile_approx(col("amount"), 0.95).as("p95_amount")
    ).collect()
    
    DistributionAnalysis(
      totalCount = stats(0).getAs[Long]("total_count"),
      distinctUsers = stats(0).getAs[Long]("distinct_users"),
      timeRange = stats(0).getAs[Long]("max_time") - stats(0).getAs[Long]("min_time"),
      skewFactor = stats(0).getAs[Double]("p95_amount") / stats(0).getAs[Double]("median_amount")
    )
  }
  
  /**
   * 计算最优分区策略
   */
  def calculateOptimalPartitions(analysis: DistributionAnalysis): Array[Long] = {
    val targetPartitionSize = 128 * 1024 * 1024L  // 目标分区大小128MB
    val estimatedDataSize = analysis.totalCount * 100  // 估算每条记录100字节
    val numPartitions = Math.ceil(estimatedDataSize / targetPartitionSize).toInt
    
    // 处理数据倾斜：对热门用户单独分区
    val hotUserThreshold = analysis.totalCount / analysis.distinctUsers * 10
    val hotUsers = identifyHotUsers(analysis, hotUserThreshold)
    
    // 生成分区边界
    generatePartitionBounds(numPartitions, hotUsers)
  }
  
  /**
   * 基于RDD的二次排序实现
   */
  def secondarySort(ordersRDD: RDD[Order]): RDD[(Long, List[Order])] = {
    // 自定义分区器：基于userId分区
    val partitioner = new SecondarySortPartitioner(100)
    
    // 创建复合键：(userId, timestamp)
    val keyedRDD = ordersRDD.map(order => 
      ((order.userId, order.timestamp), order)
    )
    
    // 使用自定义分区器
    val partitionedRDD = keyedRDD.partitionBy(partitioner)
    
    // 分区内按timestamp排序
    val sortedRDD = partitionedRDD.mapPartitions { iter =>
      // 收集分区内所有数据
      val allData = iter.toList
      
      // 分组并排序
      allData.groupBy(_._1._1)  // 按userId分组
        .mapValues(_.map(_._2).sortBy(_.timestamp).reverse)  // 按时间倒序
        .iterator
    }
    
    sortedRDD
  }
  
  /**
   * 自定义二次排序分区器
   */
  class SecondarySortPartitioner(numParts: Int) extends Partitioner {
    override def numPartitions: Int = numParts
    
    override def getPartition(key: Any): Int = {
      val k = key.asInstanceOf[(Long, Long)]
      // 只根据userId分区，timestamp用于排序
      Math.abs(k._1.hashCode()) % numPartitions
    }
  }
}

篇幅限制下面就只能给大家展示小册部分内容了。整理了一份核心面试笔记包括了：Java面试、Spring、JVM、MyBatis、Redis、MySQL、并发编程、微服务、Linux、Springboot、SpringCloud、MQ、Kafc

需要全套面试笔记及答案
【点击此处即可/免费获取】

三、分布式聚合算法

1. MapReduce聚合优化

java

复制

下载

复制代码

// MapReduce聚合高级优化
public class MapReduceAdvancedAggregation {
    
    /**
     * Combiner优化：局部聚合减少数据传输
     */
    public static class OptimizedCombiner 
            extends Reducer<Text, LongWritable, Text, LongWritable> {
        
        private final Map<String, Long> localAggregation = new HashMap<>();
        
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, 
                             Context context) {
            
            long sum = 0;
            for (LongWritable value : values) {
                sum += value.get();
                
                // 内存控制：定期flush到磁盘
                if (localAggregation.size() > 10000) {
                    flushToContext(context);
                }
            }
            
            // 累加到本地聚合结果
            String keyStr = key.toString();
            localAggregation.merge(keyStr, sum, Long::sum);
        }
        
        @Override
        protected void cleanup(Context context) 
                throws IOException, InterruptedException {
            
            // 输出所有本地聚合结果
            flushToContext(context);
        }
        
        private void flushToContext(Context context) 
                throws IOException, InterruptedException {
            
            for (Map.Entry<String, Long> entry : localAggregation.entrySet()) {
                context.write(new Text(entry.getKey()), 
                             new LongWritable(entry.getValue()));
            }
            localAggregation.clear();
        }
    }
    
    /**
     * Reducer端聚合优化：内存+磁盘混合存储
     */
    public static class HybridAggregationReducer 
            extends Reducer<Text, LongWritable, Text, LongWritable> {
        
        // 内存聚合（LRU缓存）
        private final Map<String, Long> memoryCache = new LinkedHashMap<String, Long>(1000, 0.75f, true) {
            @Override
            protected boolean removeEldestEntry(Map.Entry<String, Long> eldest) {
                // 超过阈值时，将最旧的条目刷到磁盘
                if (size() > 5000) {
                    flushToDisk(eldest.getKey(), eldest.getValue());
                    return true;
                }
                return false;
            }
        };
        
        // 磁盘存储（用于大数据量）
        private Path diskStoragePath;
        private SequenceFile.Writer diskWriter;
        
        @Override
        protected void setup(Context context) throws IOException {
            // 初始化磁盘存储
            diskStoragePath = new Path("/tmp/aggregation/" + 
                context.getTaskAttemptID().toString());
            diskWriter = SequenceFile.createWriter(context.getConfiguration(),
                SequenceFile.Writer.file(diskStoragePath),
                SequenceFile.Writer.keyClass(Text.class),
                SequenceFile.Writer.valueClass(LongWritable.class));
        }
        
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, 
                             Context context) {
            
            long sum = 0;
            for (LongWritable value : values) {
                sum += value.get();
            }
            
            String keyStr = key.toString();
            
            // 尝试内存聚合
            Long current = memoryCache.get(keyStr);
            if (current != null) {
                memoryCache.put(keyStr, current + sum);
            } else {
                memoryCache.put(keyStr, sum);
            }
        }
        
        @Override
        protected void cleanup(Context context) 
                throws IOException, InterruptedException {
            
            // 1. 输出内存中的所有结果
            for (Map.Entry<String, Long> entry : memoryCache.entrySet()) {
                context.write(new Text(entry.getKey()), 
                             new LongWritable(entry.getValue()));
            }
            
            // 2. 合并磁盘上的结果
            mergeDiskResults(context);
            
            // 3. 清理临时文件
            cleanupTempFiles();
        }
        
        private void flushToDisk(String key, Long value) {
            try {
                diskWriter.append(new Text(key), new LongWritable(value));
            } catch (IOException e) {
                // 处理IO异常
            }
        }
        
        private void mergeDiskResults(Context context) 
                throws IOException, InterruptedException {
            
            if (diskWriter != null) {
                diskWriter.close();
                
                // 读取磁盘文件并聚合
                try (SequenceFile.Reader reader = new SequenceFile.Reader(
                        context.getConfiguration(),
                        SequenceFile.Reader.file(diskStoragePath))) {
                    
                    Text key = new Text();
                    LongWritable value = new LongWritable();
                    Map<String, Long> diskAggregation = new HashMap<>();
                    
                    while (reader.next(key, value)) {
                        String keyStr = key.toString();
                        diskAggregation.merge(keyStr, value.get(), Long::sum);
                    }
                    
                    // 输出磁盘聚合结果
                    for (Map.Entry<String, Long> entry : diskAggregation.entrySet()) {
                        context.write(new Text(entry.getKey()), 
                                     new LongWritable(entry.getValue()));
                    }
                }
            }
        }
    }
    
    /**
     * 两阶段聚合：处理数据倾斜
     */
    public static class TwoPhaseAggregation {
        
        /**
         * 第一阶段：打散热点数据
         */
        public static class PhaseOneMapper 
                extends Mapper<LongWritable, Text, Text, LongWritable> {
            
            @Override
            protected void map(LongWritable key, Text value, Context context) 
                    throws IOException, InterruptedException {
                
                String[] parts = value.toString().split(",");
                if (parts.length >= 2) {
                    String originalKey = parts[0];
                    long count = Long.parseLong(parts[1]);
                    
                    // 检测热点key
                    if (isHotKey(originalKey)) {
                        // 热点key打散：添加随机后缀
                        for (int i = 0; i < 10; i++) {
                            String shuffledKey = originalKey + "_" + i;
                            context.write(new Text(shuffledKey), 
                                         new LongWritable(count / 10));
                        }
                    } else {
                        // 普通key直接输出
                        context.write(new Text(originalKey), 
                                     new LongWritable(count));
                    }
                }
            }
            
            private boolean isHotKey(String key) {
                // 基于历史统计或采样判断是否为热点
                return HOT_KEYS.contains(key);
            }
        }
        
        /**
         * 第二阶段：合并打散的数据
         */
        public static class PhaseTwoReducer 
                extends Reducer<Text, LongWritable, Text, LongWritable> {
            
            @Override
            protected void reduce(Text key, Iterable<LongWritable> values, 
                                 Context context) {
                
                long sum = 0;
                for (LongWritable value : values) {
                    sum += value.get();
                }
                
                String keyStr = key.toString();
                
                // 如果是打散的key，需要合并
                if (keyStr.contains("_")) {
                    String originalKey = keyStr.split("_")[0];
                    // 将部分聚合结果缓存，等待所有部分到达
                    cachePartialResult(originalKey, sum);
                    
                    // 检查是否所有部分都到达
                    if (allPartsArrived(originalKey)) {
                        long total = mergeAllParts(originalKey);
                        context.write(new Text(originalKey), 
                                     new LongWritable(total));
                        clearCache(originalKey);
                    }
                } else {
                    // 普通key直接输出
                    context.write(key, new LongWritable(sum));
                }
            }
        }
    }
}

2. Spark分布式聚合

scala

复制

下载

复制代码

// Spark高级聚合优化
object SparkAdvancedAggregation {
  
  // 案例：用户行为分析聚合
  case class UserBehavior(userId: Long, action: String, timestamp: Long, value: Double)
  
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("User Behavior Aggregation")
      .config("spark.sql.autoBroadcastJoinThreshold", "-1")  // 禁用广播join
      .config("spark.sql.shuffle.partitions", "2000")
      .getOrCreate()
    
    import spark.implicits._
    
    // 1. 读取用户行为数据（100亿条）
    val behaviorDS = spark.read
      .parquet("hdfs://user-behavior/*.parquet")
      .as[UserBehavior]
      .repartition($"userId")  // 按userId预分区
    
    // 2. 多维度聚合（使用Spark SQL优化）
    behaviorDS.createOrReplaceTempView("user_behavior")
    
    val aggregatedDF = spark.sql("""
      -- 多级聚合：小时 -> 天 -> 月
      WITH hourly_agg AS (
        SELECT 
          userId,
          date_trunc('hour', from_unixtime(timestamp)) as hour,
          action,
          COUNT(*) as action_count,
          SUM(value) as total_value,
          AVG(value) as avg_value
        FROM user_behavior
        WHERE timestamp >= UNIX_TIMESTAMP('2024-01-01')
        GROUP BY userId, date_trunc('hour', from_unixtime(timestamp)), action
      ),
      daily_agg AS (
        SELECT 
          userId,
          date_trunc('day', hour) as day,
          action,
          SUM(action_count) as daily_count,
          SUM(total_value) as daily_value,
          AVG(avg_value) as daily_avg
        FROM hourly_agg
        GROUP BY userId, date_trunc('day', hour), action
      ),
      monthly_agg AS (
        SELECT 
          userId,
          date_trunc('month', day) as month,
          action,
          SUM(daily_count) as monthly_count,
          SUM(daily_value) as monthly_value,
          AVG(daily_avg) as monthly_avg,
          -- 计算百分位数（需要聚合UDAF）
          percentile_approx(daily_value, 0.5) as median_value
        FROM daily_agg
        GROUP BY userId, date_trunc('month', day), action
      )
      SELECT * FROM monthly_agg
      ORDER BY monthly_count DESC
      LIMIT 1000000
    """)
    
    // 3. 处理数据倾斜：Salting技术
    val skewedDF = handleDataSkew(behaviorDS)
    
    // 4. 增量聚合：维护聚合状态
    val incrementalAggDF = incrementalAggregation(behaviorDS)
    
    // 5. 写入聚合结果
    aggregatedDF.write
      .mode(SaveMode.Overwrite)
      .parquet("hdfs://aggregated-results/")
    
    spark.stop()
  }
  
  /**
   * 处理数据倾斜：Salting + 两阶段聚合
   */
  def handleDataSkew(behaviorDS: Dataset[UserBehavior]): DataFrame = {
    import behaviorDS.sparkSession.implicits._
    
    // 第一阶段：添加随机后缀打散热点数据
    val saltedDS = behaviorDS
      .map { behavior =>
        val salt = if (isHotUser(behavior.userId)) {
          // 热点用户添加随机后缀
          s"${behavior.userId}_${Random.nextInt(10)}"
        } else {
          behavior.userId.toString
        }
        (salt, behavior.action, behavior.timestamp, behavior.value)
      }
      .toDF("salted_user_id", "action", "timestamp", "value")
    
    // 第一阶段聚合（在打散的key上）
    val stage1Agg = saltedDS
      .groupBy("salted_user_id", "action")
      .agg(
        count("*").as("partial_count"),
        sum("value").as("partial_sum"),
        avg("value").as("partial_avg")
      )
    
    // 第二阶段：还原原始key并聚合
    val stage2Agg = stage1Agg
      .withColumn("original_user_id", 
                  split($"salted_user_id", "_").getItem(0).cast("long"))
      .groupBy("original_user_id", "action")
      .agg(
        sum("partial_count").as("total_count"),
        sum("partial_sum").as("total_sum"),
        avg("partial_avg").as("total_avg")
      )
    
    stage2Agg
  }
  
  /**
   * 增量聚合：维护滑动窗口聚合状态
   */
  def incrementalAggregation(behaviorDS: Dataset[UserBehavior]): DataFrame = {
    import behaviorDS.sparkSession.implicits._
    import org.apache.spark.sql.expressions.Window
    
    // 定义滑动窗口（最近24小时，每1小时滑动一次）
    val windowSpec = Window
      .partitionBy("userId", "action")
      .orderBy($"timestamp")
      .rangeBetween(-24 * 3600, 0)  // 24小时窗口
    
    behaviorDS
      .withColumn("timestamp", from_unixtime($"timestamp"))
      .withWatermark("timestamp", "1 hour")  // 水印处理延迟数据
      .groupBy(
        window($"timestamp", "1 hour", "1 hour"),  // 1小时滑动窗口
        $"userId",
        $"action"
      )
      .agg(
        count("*").as("hourly_count"),
        sum("value").as("hourly_sum"),
        avg("value").as("hourly_avg")
      )
      .withColumn("24h_rolling_count", 
                  sum("hourly_count").over(windowSpec))
      .withColumn("24h_rolling_avg", 
                  avg("hourly_avg").over(windowSpec))
  }
  
  /**
   * 自定义聚合函数：HyperLogLog基数估算
   */
  class HyperLogLogAgg extends UserDefinedAggregateFunction {
    
    override def inputSchema: StructType = 
      StructType(StructField("value", StringType) :: Nil)
    
    override def bufferSchema: StructType = 
      StructType(StructField("registers", ArrayType(IntegerType)) :: Nil)
    
    override def dataType: DataType = LongType
    
    override def deterministic: Boolean = true
    
    override def initialize(buffer: MutableAggregationBuffer): Unit = {
      // 初始化HLL寄存器数组（m=2^14=16384个寄存器）
      buffer(0) = Array.fill(16384)(0)
    }
    
    override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
      val value = input.getString(0)
      val hash = MurmurHash3.hash128(value).getLong(0)
      
      // 计算寄存器索引和rho值
      val index = (hash & 0x3FFF).toInt  // 低14位作为索引
      val rho = numberOfLeadingZeros(hash >>> 14) + 1
      
      // 更新寄存器
      val registers = buffer.getAs[Seq[Int]](0)
      if (rho > registers(index)) {
        val updated = registers.updated(index, rho)
        buffer.update(0, updated)
      }
    }
    
    override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
      val registers1 = buffer1.getAs[Seq[Int]](0)
      val registers2 = buffer2.getAs[Seq[Int]](0)
      
      // 合并两个HLL寄存器（取最大值）
      val merged = (registers1 zip registers2).map { case (r1, r2) =>
        Math.max(r1, r2)
      }
      
      buffer1.update(0, merged)
    }
    
    override def evaluate(buffer: Row): Any = {
      val registers = buffer.getAs[Seq[Int]](0)
      
      // 计算基数估计
      val m = registers.length.toDouble
      val alpha = 0.7213 / (1 + 1.079 / m)  // 修正系数
      
      val sum = registers.map(r => 1.0 / (1 << r)).sum
      val estimate = alpha * m * m / sum
      
      // 小范围修正
      val corrected = if (estimate <= 2.5 * m) {
        // 线性计数
        val zeros = registers.count(_ == 0)
        if (zeros > 0) m * math.log(m / zeros) else estimate
      } else {
        estimate
      }
      
      corrected.toLong
    }
  }
}

四、存储优化与索引

1. 列式存储优化

java

复制

下载

复制代码

// 列式存储聚合优化
public class ColumnarStorageOptimization {
    
    /**
     * Parquet列式存储优化
     */
    public class ParquetAggregationOptimizer {
        
        /**
         * 基于列统计的谓词下推
         */
        public Dataset<Row> optimizeWithPredicatePushdown(
                SparkSession spark, String parquetPath) {
            
            // Parquet文件自动收集列统计信息
            Dataset<Row> df = spark.read.parquet(parquetPath);
            
            // 启用向量化读取（批量处理）
            spark.conf().set("spark.sql.parquet.enableVectorizedReader", "true");
            
            // 启用谓词下推
            spark.conf().set("spark.sql.parquet.filterPushdown", "true");
            
            // 启用字典过滤
            spark.conf().set("spark.sql.parquet.dictionary.enabled", "true");
            
            // 基于统计信息的过滤
            return df.filter("amount > 100 AND amount < 1000")  // 范围过滤
                    .filter("category IN ('electronics', 'books')")  // IN过滤
                    .filter("timestamp >= '2024-01-01'");  // 时间过滤
        }
        
        /**
         * 列裁剪优化：只读取需要的列
         */
        public Dataset<Row> optimizeColumnPruning(
                Dataset<Row> df, List<String> requiredColumns) {
            
            // 只选择需要的列
            Dataset<Row> prunedDF = df.selectExpr(requiredColumns.toArray(new String[0]));
            
            // 启用列裁剪
            spark.conf().set("spark.sql.parquet.columnarReaderBatchSize", "4096");
            
            return prunedDF;
        }
        
        /**
         * 分区裁剪：基于分区键过滤
         */
        public Dataset<Row> optimizePartitionPruning(
                Dataset<Row> df, String partitionColumn, String partitionValue) {
            
            // 创建分区视图
            df.createOrReplaceTempView("data");
            
            // 使用分区键过滤，Spark会自动进行分区裁剪
            return spark.sql(
                String.format("SELECT * FROM data WHERE %s = '%s'", 
                             partitionColumn, partitionValue)
            );
        }
    }
    
    /**
     * ORC存储格式优化
     */
    public class ORCOptimization {
        
        public Dataset<Row> optimizeORCReading(SparkSession spark, String orcPath) {
            
            // 启用ORC优化
            spark.conf().set("spark.sql.orc.enabled", "true");
            spark.conf().set("spark.sql.orc.filterPushdown", "true");
            spark.conf().set("spark.sql.orc.splits.include.file.footer", "true");
            
            // 使用索引加速
            spark.conf().set("spark.sql.orc.impl", "native");
            spark.conf().set("spark.sql.orc.skipCorruptRecords", "true");
            
            // Bloom Filter索引
            spark.conf().set("spark.sql.orc.bloom.filter.columns", "userId,category");
            spark.conf().set("spark.sql.orc.bloom.filter.fpp", "0.05");
            
            return spark.read().orc(orcPath);
        }
    }
    
    /**
     * 数据湖格式优化（Delta Lake/Iceberg）
     */
    public class DataLakeOptimization {
        
        /**
         * Delta Lake Z-Order优化
         */
        public void optimizeWithZOrder(Dataset<Row> df, String[] zOrderColumns) {
            
            // 将数据写入Delta Lake
            df.write()
              .format("delta")
              .mode("overwrite")
              .save("/data/delta-table");
            
            // 执行Z-Order优化（多维聚类）
            spark.sql("OPTIMIZE delta.`/data/delta-table` " +
                     "ZORDER BY (" + String.join(",", zOrderColumns) + ")");
            
            // 收集统计信息
            spark.sql("ANALYZE TABLE delta.`/data/delta-table` COMPUTE STATISTICS");
        }
        
        /**
         * Iceberg隐藏分区
         */
        public void optimizeWithHiddenPartitions(Dataset<Row> df) {
            
            // 创建Iceberg表，定义隐藏分区
            spark.sql("""
                CREATE TABLE iceberg_db.user_behavior (
                    userId BIGINT,
                    action STRING,
                    timestamp TIMESTAMP,
                    value DOUBLE
                )
                USING iceberg
                PARTITIONED BY (days(timestamp), bucket(10, userId))
                TBLPROPERTIES (
                    'write.format.default'='parquet',
                    'write.parquet.compression-codec'='zstd'
                )
            """);
            
            // 写入数据，Iceberg会自动管理分区
            df.writeTo("iceberg_db.user_behavior").append();
            
            // 过期快照清理
            spark.sql("""
                CALL iceberg.system.expire_snapshots(
                    'iceberg_db.user_behavior',
                    TIMESTAMP '2024-01-01 00:00:00'
                )
            """);
        }
    }
}

2. 索引加速策略

java

复制

下载

复制代码

/**
 * 跳跃表索引：支持范围查询
 */
public class SkipListIndex {
    
    private static class SkipListNode {
        String key;
        List<DataLocation> locations;
        SkipListNode[] forward;
        
        public SkipListNode(String key, int level) {
            this.key = key;
            this.locations = new ArrayList<>();
            this.forward = new SkipListNode[level + 1];
        }
    }
    
    private SkipListNode header;
    private int maxLevel;
    private int size;
    private Random random;
    
    public SkipListIndex(int maxLevel) {
        this.maxLevel = maxLevel;
        this.header = new SkipListNode(null, maxLevel);
        this.random = new Random();
        this.size = 0;
    }
    
    /**
     * 插入索引
     */
    public void insert(String key, DataLocation location) {
        SkipListNode[] update = new SkipListNode[maxLevel + 1];
        SkipListNode current = header;
        
        // 查找插入位置
        for (int i = maxLevel; i >= 0; i--) {
            while (current.forward[i] != null && 
                   current.forward[i].key.compareTo(key) < 0) {
                current = current.forward[i];
            }
            update[i] = current;
        }
        
        current = current.forward[0];
        
        if (current != null && current.key.equals(key)) {
            // 键已存在，添加位置
            current.locations.add(location);
        } else {
            // 创建新节点
            int level = randomLevel();
            SkipListNode newNode = new SkipListNode(key, level);
            newNode.locations.add(location);
            
            // 更新指针
            for (int i = 0; i <= level; i++) {
                newNode.forward[i] = update[i].forward[i];
                update[i].forward[i] = newNode;
            }
            
            size++;
        }
    }
    
    /**
     * 范围查询
     */
    public List<DataLocation> rangeQuery(String startKey, String endKey) {
        List<DataLocation> results = new ArrayList<>();
        SkipListNode current = findNode(startKey);
        
        while (current != null && current.key.compareTo(endKey) <= 0) {
            results.addAll(current.locations);
            current = current.forward[0];
        }
        
        return results;
    }
    
    private SkipListNode findNode(String key) {
        SkipListNode current = header;
        
        for (int i = maxLevel; i >= 0; i--) {
            while (current.forward[i] != null && 
                   current.forward[i].key.compareTo(key) < 0) {
                current = current.forward[i];
            }
        }
        
        return current.forward[0];
    }
    
    private int randomLevel() {
        int level = 0;
        while (random.nextDouble() < 0.5 && level < maxLevel) {
            level++;
        }
        return level;
    }
}

/**
 * 布隆过滤器索引：快速过滤不存在的数据
 */
public class BloomFilterIndex {
    
    private final BitSet bitSet;
    private final int size;
    private final int[] hashSeeds;
    private final int hashFunctions;
    
    public BloomFilterIndex(int expectedSize, double falsePositiveRate) {
        this.size = optimalBitSetSize(expectedSize, falsePositiveRate);
        this.hashFunctions = optimalHashFunctions(expectedSize, size);
        this.bitSet = new BitSet(size);
        this.hashSeeds = generateHashSeeds(hashFunctions);
    }
    
    /**
     * 添加元素到布隆过滤器
     */
    public void add(String key) {
        for (int i = 0; i < hashFunctions; i++) {
            int hash = hash(key, hashSeeds[i]);
            bitSet.set(Math.abs(hash % size));
        }
    }
    
    /**
     * 检查元素是否存在
     */
    public boolean mightContain(String key) {
        for (int i = 0; i < hashFunctions; i++) {
            int hash = hash(key, hashSeeds[i]);
            if (!bitSet.get(Math.abs(hash % size))) {
                return false;
            }
        }
        return true;
    }
    
    /**
     * 批量添加元素
     */
    public void addAll(Collection<String> keys) {
        for (String key : keys) {
            add(key);
        }
    }
    
    /**
     * 合并两个布隆过滤器
     */
    public void merge(BloomFilterIndex other) {
        if (this.size != other.size || this.hashFunctions != other.hashFunctions) {
            throw new IllegalArgumentException("Bloom filters must have same configuration");
        }
        this.bitSet.or(other.bitSet);
    }
    
    private int optimalBitSetSize(int n, double p) {
        return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
    }
    
    private int optimalHashFunctions(int n, int m) {
        return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
    }
    
    private int[] generateHashSeeds(int numHashes) {
        int[] seeds = new int[numHashes];
        Random random = new Random();
        for (int i = 0; i < numHashes; i++) {
            seeds[i] = random.nextInt();
        }
        return seeds;
    }
    
    private int hash(String key, int seed) {
        return MurmurHash3.hash32x86(key.getBytes(), seed);
    }
}

/**
 * 分布式倒排索引
 */
public class DistributedInvertedIndex {
    
    /**
     * 构建倒排索引
     */
    public JavaRDD<Tuple2<String, List<Long>>> buildInvertedIndex(
            JavaRDD<String> documents, int numPartitions) {
        
        // 第一步：文档分词
        JavaRDD<Tuple2<String, Long>> termDocPairs = documents.zipWithIndex()
            .flatMapToPair(document -> {
                List<Tuple2<String, Long>> pairs = new ArrayList<>();
                long docId = document._2;
                String text = document._1;
                
                // 分词处理
                String[] terms = text.toLowerCase()
                    .replaceAll("[^a-z0-9\\s]", "")
                    .split("\\s+");
                
                for (String term : terms) {
                    if (!term.isEmpty()) {
                        pairs.add(new Tuple2<>(term, docId));
                    }
                }
                
                return pairs.iterator();
            });
        
        // 第二步：聚合相同term的文档列表
        JavaPairRDD<String, List<Long>> invertedIndex = termDocPairs
            .groupByKey(numPartitions)
            .mapValues(docIds -> {
                // 去重并排序
                List<Long> sortedIds = new ArrayList<>();
                for (Long docId : docIds) {
                    if (!sortedIds.contains(docId)) {
                        sortedIds.add(docId);
                    }
                }
                Collections.sort(sortedIds);
                return sortedIds;
            });
        
        // 第三步：压缩存储（增量编码）
        JavaPairRDD<String, byte[]> compressedIndex = invertedIndex
            .mapValues(docIds -> {
                return compressDocIds(docIds);
            });
        
        return invertedIndex;
    }
    
    /**
     * 增量编码压缩
     */
    private byte[] compressDocIds(List<Long> docIds) {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        DataOutputStream dos = new DataOutputStream(baos);
        
        try {
            long prevId = 0;
            for (Long docId : docIds) {
                long delta = docId - prevId;
                writeVariableByte(delta, dos);
                prevId = docId;
            }
        } catch (IOException e) {
            // 处理异常
        }
        
        return baos.toByteArray();
    }
    
    /**
     * 变长字节编码
     */
    private void writeVariableByte(long value, DataOutputStream dos) 
            throws IOException {
        while (value > 127) {
            dos.writeByte((int) (value & 0x7F));
            value >>>= 7;
        }
        dos.writeByte((int) (value | 0x80));
    }
    
    /**
     * 查询倒排索引
     */
    public JavaRDD<Long> queryInvertedIndex(
            JavaPairRDD<String, List<Long>> index, 
            List<String> queryTerms, 
            QueryType queryType) {
        
        // 获取每个term的文档列表
        List<JavaRDD<Long>> termDocRDDs = new ArrayList<>();
        
        for (String term : queryTerms) {
            JavaRDD<Long> docRDD = index
                .filter(tuple -> tuple._1.equals(term))
                .flatMap(tuple -> tuple._2.iterator());
            
            termDocRDDs.add(docRDD);
        }
        
        // 根据查询类型合并结果
        JavaRDD<Long> resultRDD;
        
        switch (queryType) {
            case AND:
                // 求交集
                resultRDD = termDocRDDs.get(0);
                for (int i = 1; i < termDocRDDs.size(); i++) {
                    resultRDD = resultRDD.intersection(termDocRDDs.get(i));
                }
                break;
                
            case OR:
                // 求并集
                resultRDD = termDocRDDs.get(0);
                for (int i = 1; i < termDocRDDs.size(); i++) {
                    resultRDD = resultRDD.union(termDocRDDs.get(i))
                        .distinct();
                }
                break;
                
            case NOT:
                // 求差集
                resultRDD = termDocRDDs.get(0)
                    .subtract(termDocRDDs.get(1));
                break;
                
            default:
                throw new IllegalArgumentException("Unsupported query type");
        }
        
        return resultRDD;
    }
}

/**
 * 实时索引更新
 */
public class RealTimeIndexing {
    
    /**
     * LSM树（Log-Structured Merge-Tree）索引
     */
    public class LSMTreeIndex {
        
        // MemTable（内存表）
        private final ConcurrentSkipListMap<String, String> memTable;
        private final int memTableThreshold;
        
        // SSTable文件列表
        private final List<SSTable> ssTables;
        
        // 布隆过滤器加速查询
        private final BloomFilterIndex bloomFilter;
        
        public LSMTreeIndex(int memTableThreshold) {
            this.memTable = new ConcurrentSkipListMap<>();
            this.memTableThreshold = memTableThreshold;
            this.ssTables = new ArrayList<>();
            this.bloomFilter = new BloomFilterIndex(1000000, 0.01);
        }
        
        /**
         * 写入数据
         */
        public void put(String key, String value) {
            synchronized (memTable) {
                memTable.put(key, value);
                bloomFilter.add(key);
                
                // 检查MemTable大小
                if (memTable.size() >= memTableThreshold) {
                    flushMemTableToDisk();
                }
            }
        }
        
        /**
         * 读取数据
         */
        public String get(String key) {
            // 先查布隆过滤器
            if (!bloomFilter.mightContain(key)) {
                return null;
            }
            
            // 先查MemTable
            String value = memTable.get(key);
            if (value != null) {
                return value;
            }
            
            // 再查SSTable（从新到旧）
            for (int i = ssTables.size() - 1; i >= 0; i--) {
                value = ssTables.get(i).get(key);
                if (value != null) {
                    return value;
                }
            }
            
            return null;
        }
        
        /**
         * 将MemTable刷到磁盘
         */
        private void flushMemTableToDisk() {
            synchronized (memTable) {
                if (memTable.isEmpty()) {
                    return;
                }
                
                // 创建SSTable文件
                SSTable ssTable = new SSTable(memTable);
                ssTables.add(ssTable);
                
                // 清空MemTable
                memTable.clear();
                
                // 定期合并SSTable
                if (ssTables.size() > 10) {
                    compactSSTables();
                }
            }
        }
        
        /**
         * 合并SSTable（减少读取时的I/O）
         */
        private void compactSSTables() {
            // 选择要合并的SSTable（通常是较旧的）
            List<SSTable> toCompact = selectSSTablesForCompaction();
            
            // 多路归并
            MergeIterator mergeIterator = new MergeIterator(toCompact);
            
            // 创建新的SSTable
            SSTable newSsTable = new SSTable(mergeIterator);
            
            // 替换旧的SSTable
            ssTables.removeAll(toCompact);
            ssTables.add(newSsTable);
        }
    }
    
    /**
     * SSTable（Sorted String Table）
     */
    class SSTable {
        private final String filePath;
        private final NavigableMap<String, Long> sparseIndex; // 稀疏索引
        private final BloomFilterIndex bloomFilter;
        
        public SSTable(NavigableMap<String, String> data) {
            this.filePath = createSSTableFile(data);
            this.sparseIndex = buildSparseIndex(data);
            this.bloomFilter = buildBloomFilter(data);
        }
        
        public String get(String key) {
            // 先查布隆过滤器
            if (!bloomFilter.mightContain(key)) {
                return null;
            }
            
            // 查找数据块
            Map.Entry<String, Long> floorEntry = sparseIndex.floorEntry(key);
            if (floorEntry == null) {
                return null;
            }
            
            // 读取数据块并查找
            return readAndSearchBlock(floorEntry.getValue(), key);
        }
        
        private String readAndSearchBlock(long offset, String key) {
            // 读取数据块并在内存中查找
            try (RandomAccessFile file = new RandomAccessFile(filePath, "r")) {
                file.seek(offset);
                
                // 读取数据块大小
                int blockSize = file.readInt();
                byte[] blockData = new byte[blockSize];
                file.readFully(blockData);
                
                // 解析数据块
                return binarySearchInBlock(blockData, key);
                
            } catch (IOException e) {
                return null;
            }
        }
    }
}

五、容错与一致性保障

1. 容错机制设计

java

复制

下载

复制代码

/**
 * 分布式排序容错机制
 */
public class FaultTolerantSorting {
    
    /**
     * Checkpoint机制：定期保存排序状态
     */
    public static class CheckpointManager {
        
        private final String checkpointDir;
        private final int checkpointInterval;
        
        public CheckpointManager(String checkpointDir, int interval) {
            this.checkpointDir = checkpointDir;
            this.checkpointInterval = interval;
        }
        
        /**
         * 创建检查点
         */
        public void createCheckpoint(
                JavaRDD<Row> rdd, 
                int stageId, 
                String checkpointName) {
            
            // 保存RDD到可靠的存储系统
            rdd.saveAsObjectFile(
                checkpointDir + "/" + stageId + "/" + checkpointName
            );
            
            // 保存元数据
            saveMetadata(stageId, checkpointName, rdd.getNumPartitions());
        }
        
        /**
         * 从检查点恢复
         */
        public JavaRDD<Row> recoverFromCheckpoint(
                SparkContext sc,
                int stageId,
                String checkpointName) {
            
            // 加载元数据
            CheckpointMetadata metadata = loadMetadata(stageId, checkpointName);
            
            // 恢复RDD
            JavaRDD<Row> recoveredRDD = sc.objectFile(
                checkpointDir + "/" + stageId + "/" + checkpointName,
                metadata.getNumPartitions()
            );
            
            return recoveredRDD;
        }
    }
    
    /**
     * 推测执行（Speculative Execution）
     */
    public static class SpeculativeExecution {
        
        /**
         * 检测慢任务并启动推测执行
         */
        public void monitorAndSpeculate(List<TaskInfo> tasks) {
            
            // 计算任务平均执行时间
            double avgDuration = calculateAverageDuration(tasks);
            
            // 检测慢任务（超过平均时间2倍）
            List<TaskInfo> slowTasks = tasks.stream()
                .filter(task -> task.getDuration() > avgDuration * 2)
                .collect(Collectors.toList());
            
            // 启动推测执行
            for (TaskInfo slowTask : slowTasks) {
                if (shouldSpeculate(slowTask)) {
                    launchSpeculativeTask(slowTask);
                }
            }
        }
        
        private boolean shouldSpeculate(TaskInfo task) {
            // 检查资源使用情况
            double cpuUsage = task.getCpuUsage();
            double memUsage = task.getMemUsage();
            
            // 判断是否为资源竞争导致的慢
            if (cpuUsage < 0.3 && memUsage < 0.3) {
                return true; // 资源充足，可能是节点故障
            }
            
            // 检查网络情况
            double networkDelay = task.getNetworkDelay();
            return networkDelay < 100; // 网络延迟小于100ms
        }
    }
    
    /**
     * 数据备份与恢复
     */
    public static class DataReplication {
        
        private final int replicationFactor;
        private final List<StorageNode> storageNodes;
        
        public DataReplication(int replicationFactor) {
            this.replicationFactor = replicationFactor;
            this.storageNodes = new ArrayList<>();
        }
        
        /**
         * 多副本写入
         */
        public void writeWithReplication(String dataId, byte[] data) {
            List<StorageNode> selectedNodes = selectNodesForReplication(dataId);
            
            // 并发写入多个副本
            List<CompletableFuture<Void>> futures = selectedNodes.stream()
                .map(node -> CompletableFuture.runAsync(() -> {
                    node.writeData(dataId, data);
                }))
                .collect(Collectors.toList());
            
            // 等待多数副本写入成功
            CompletableFuture<Void> allFutures = CompletableFuture.allOf(
                futures.toArray(new CompletableFuture[0])
            );
            
            try {
                allFutures.get();
            } catch (Exception e) {
                handleWriteFailure(dataId, selectedNodes);
            }
        }
        
        /**
         * 读取数据（优先读取最快副本）
         */
        public byte[] readWithReplication(String dataId) {
            List<StorageNode> replicaNodes = findReplicaNodes(dataId);
            
            // 并发读取多个副本，取最先返回的结果
            return replicaNodes.parallelStream()
                .map(node -> {
                    try {
                        return node.readData(dataId);
                    } catch (Exception e) {
                        return null;
                    }
                })
                .filter(Objects::nonNull)
                .findFirst()
                .orElseThrow(() -> new DataNotFoundException(dataId));
        }
        
        /**
         * 数据修复（当副本数不足时）
         */
        public void repairReplication(String dataId) {
            List<StorageNode> existingReplicas = findReplicaNodes(dataId);
            
            if (existingReplicas.size() < replicationFactor) {
                // 读取数据
                byte[] data = existingReplicas.get(0).readData(dataId);
                
                // 选择新的存储节点
                List<StorageNode> newNodes = selectNewNodesForReplication(dataId, existingReplicas);
                
                // 复制数据到新节点
                for (StorageNode node : newNodes) {
                    node.writeData(dataId, data);
                }
            }
        }
    }
}

/**
 * 一致性保障机制
 */
public class ConsistencyGuarantee {
    
    /**
     * 分布式事务（两阶段提交）
     */
    public static class TwoPhaseCommit {
        
        private final List<ResourceManager> participants;
        private final TransactionCoordinator coordinator;
        
        public TwoPhaseCommit(List<ResourceManager> participants) {
            this.participants = participants;
            this.coordinator = new TransactionCoordinator();
        }
        
        /**
         * 执行两阶段提交
         */
        public boolean executeTransaction(Transaction transaction) {
            
            // 第一阶段：准备阶段
            List<PrepareResult> prepareResults = participants.parallelStream()
                .map(participant -> participant.prepare(transaction))
                .collect(Collectors.toList());
            
            boolean allPrepared = prepareResults.stream()
                .allMatch(PrepareResult::isPrepared);
            
            if (!allPrepared) {
                // 有参与者准备失败，回滚事务
                abortTransaction(transaction);
                return false;
            }
            
            // 第二阶段：提交阶段
            List<CommitResult> commitResults = participants.parallelStream()
                .map(participant -> participant.commit(transaction))
                .collect(Collectors.toList());
            
            boolean allCommitted = commitResults.stream()
                .allMatch(CommitResult::isCommitted);
            
            if (!allCommitted) {
                // 提交失败，需要人工干预
                handleCommitFailure(transaction);
                return false;
            }
            
            return true;
        }
        
        private void abortTransaction(Transaction transaction) {
            participants.forEach(participant -> 
                participant.rollback(transaction)
            );
        }
    }
    
    /**
     * 最终一致性实现
     */
    public static class EventualConsistency {
        
        private final ConflictResolver conflictResolver;
        private final VectorClock vectorClock;
        
        public EventualConsistency() {
            this.conflictResolver = new LastWriteWinsResolver();
            this.vectorClock = new VectorClock();
        }
        
        /**
         * 基于向量时钟的版本管理
         */
        public void writeWithVersion(String key, String value, String nodeId) {
            // 更新向量时钟
            vectorClock.increment(nodeId);
            
            // 创建带版本的数据
            VersionedData versionedData = new VersionedData(
                key, value, vectorClock.getClock()
            );
            
            // 写入数据（多副本）
            writeToReplicas(versionedData);
        }
        
        /**
         * 读取数据并解决冲突
         */
        public String readWithConflictResolution(String key) {
            // 从多个副本读取数据
            List<VersionedData> replicas = readFromReplicas(key);
            
            if (replicas.isEmpty()) {
                return null;
            }
            
            if (replicas.size() == 1) {
                return replicas.get(0).getValue();
            }
            
            // 检测冲突
            if (hasConflict(replicas)) {
                // 解决冲突
                VersionedData resolved = conflictResolver.resolve(replicas);
                // 修复副本（读时修复）
                repairReplicas(key, resolved);
                return resolved.getValue();
            }
            
            // 没有冲突，返回最新版本
            return getLatestVersion(replicas).getValue();
        }
        
        /**
         * 读时修复（Read Repair）
         */
        private void repairReplicas(String key, VersionedData correctData) {
            List<StorageNode> replicaNodes = findReplicaNodes(key);
            
            replicaNodes.parallelStream().forEach(node -> {
                VersionedData nodeData = node.readVersionedData(key);
                if (!correctData.getVersion().equals(nodeData.getVersion())) {
                    node.writeVersionedData(correctData);
                }
            });
        }
    }
    
    /**
     * 向量时钟实现
     */
    static class VectorClock {
        private final Map<String, Long> clock = new ConcurrentHashMap<>();
        
        public void increment(String nodeId) {
            clock.merge(nodeId, 1L, Long::sum);
        }
        
        public Map<String, Long> getClock() {
            return new HashMap<>(clock);
        }
        
        public boolean happenedBefore(Map<String, Long> vc1, Map<String, Long> vc2) {
            for (Map.Entry<String, Long> entry : vc1.entrySet()) {
                Long v2 = vc2.get(entry.getKey());
                if (v2 == null || entry.getValue() > v2) {
                    return false;
                }
            }
            return true;
        }
    }
}

篇幅限制下面就只能给大家展示小册部分内容了。整理了一份核心面试笔记包括了：Java面试、Spring、JVM、MyBatis、Redis、MySQL、并发编程、微服务、Linux、Springboot、SpringCloud、MQ、Kafc

需要全套面试笔记及答案
【点击此处即可/免费获取】

2. 性能监控与调优

java

复制

下载

复制代码

/**
 * 分布式排序性能监控
 */
public class PerformanceMonitor {
    
    private final MetricsCollector metricsCollector;
    private final AlertManager alertManager;
    
    public PerformanceMonitor() {
        this.metricsCollector = new MetricsCollector();
        this.alertManager = new AlertManager();
    }
    
    /**
     * 监控关键指标
     */
    public void monitorSortingJob(SortingJob job) {
        
        // 收集Shuffle指标
        ShuffleMetrics shuffleMetrics = metricsCollector.collectShuffleMetrics();
        monitorShufflePerformance(shuffleMetrics);
        
        // 收集内存使用情况
        MemoryMetrics memoryMetrics = metricsCollector.collectMemoryMetrics();
        monitorMemoryUsage(memoryMetrics);
        
        // 收集网络I/O
        NetworkMetrics networkMetrics = metricsCollector.collectNetworkMetrics();
        monitorNetworkTraffic(networkMetrics);
        
        // 收集CPU使用率
        CpuMetrics cpuMetrics = metricsCollector.collectCpuMetrics();
        monitorCpuUsage(cpuMetrics);
        
        // 检测数据倾斜
        detectDataSkew(job);
    }
    
    /**
     * 检测和预警数据倾斜
     */
    private void detectDataSkew(SortingJob job) {
        List<PartitionMetrics> partitionMetrics = job.getPartitionMetrics();
        
        // 计算分区间数据量差异
        long[] partitionSizes = partitionMetrics.stream()
            .mapToLong(PartitionMetrics::getSize)
            .toArray();
        
        double avgSize = Arrays.stream(partitionSizes).average().orElse(0);
        double maxSize = Arrays.stream(partitionSizes).max().orElse(0);
        
        // 计算倾斜度
        double skewRatio = maxSize / avgSize;
        
        if (skewRatio > 3.0) { // 阈值可配置
            alertManager.sendAlert(
                AlertLevel.WARNING,
                "Data skew detected",
                String.format("Skew ratio: %.2f, max partition: %d, avg: %.2f", 
                    skewRatio, maxSize, avgSize)
            );
            
            // 自动触发再平衡
            rebalancePartitions(job);
        }
    }
    
    /**
     * 自动调优参数
     */
    public void autoTuneParameters(SparkSession spark, Dataset<?> dataset) {
        
        // 动态调整Shuffle分区数
        long dataSize = estimateDataSize(dataset);
        int optimalPartitions = calculateOptimalPartitions(dataSize);
        
        spark.conf().set("spark.sql.shuffle.partitions", 
                        String.valueOf(optimalPartitions));
        
        // 动态调整Executor内存
        MemoryMetrics memoryMetrics = metricsCollector.collectMemoryMetrics();
        if (memoryMetrics.getSpillCount() > 100) {
            // 频繁spill，增加内存
            increaseExecutorMemory(spark, memoryMetrics);
        }
        
        // 启用压缩
        if (dataSize > 100 * 1024 * 1024) { // 大于100MB
            spark.conf().set("spark.shuffle.compress", "true");
            spark.conf().set("spark.shuffle.spill.compress", "true");
        }
    }
    
    /**
     * 生成性能报告
     */
    public PerformanceReport generateReport(SortingJob job) {
        PerformanceReport report = new PerformanceReport();
        
        // 收集时间指标
        long totalTime = job.getTotalTime();
        long shuffleTime = job.getShuffleTime();
        long sortTime = job.getSortTime();
        long ioTime = job.getIoTime();
        
        report.setTotalTime(totalTime);
        report.setShuffleTime(shuffleTime);
        report.setSortTime(sortTime);
        report.setIoTime(ioTime);
        
        // 计算效率指标
        double shuffleRatio = (double) shuffleTime / totalTime;
        double sortRatio = (double) sortTime / totalTime;
        double ioRatio = (double) ioTime / totalTime;
        
        report.setShuffleRatio(shuffleRatio);
        report.setSortRatio(sortRatio);
        report.setIoRatio(ioRatio);
        
        // 收集资源使用情况
        report.setCpuUsage(job.getAvgCpuUsage());
        report.setMemoryUsage(job.getAvgMemoryUsage());
        report.setNetworkUsage(job.getNetworkTraffic());
        
        // 生成优化建议
        List<OptimizationSuggestion> suggestions = generateSuggestions(report);
        report.setSuggestions(suggestions);
        
        return report;
    }
    
    private List<OptimizationSuggestion> generateSuggestions(PerformanceReport report) {
        List<OptimizationSuggestion> suggestions = new ArrayList<>();
        
        // 分析瓶颈并提出建议
        if (report.getShuffleRatio() > 0.5) {
            suggestions.add(new OptimizationSuggestion(
                "Shuffle优化",
                "Shuffle时间占比过高，建议：\n" +
                "1. 增加shuffle分区数\n" +
                "2. 启用shuffle压缩\n" +
                "3. 使用更好的序列化器",
                Priority.HIGH
            ));
        }
        
        if (report.getIoRatio() > 0.3) {
            suggestions.add(new OptimizationSuggestion(
                "I/O优化",
                "I/O时间占比过高，建议：\n" +
                "1. 使用列式存储格式（Parquet/ORC）\n" +
                "2. 启用谓词下推\n" +
                "3. 增加内存缓存",
                Priority.MEDIUM
            ));
        }
        
        if (report.getMemoryUsage() > 0.8) {
            suggestions.add(new OptimizationSuggestion(
                "内存优化",
                "内存使用率过高，建议：\n" +
                "1. 增加Executor内存\n" +
                "2. 减少并行度\n" +
                "3. 优化数据分区",
                Priority.HIGH
            ));
        }
        
        return suggestions;
    }
}

六、实战案例与最佳实践

1. 电商平台交易数据排序案例

java

复制

下载

复制代码

/**
 * 电商交易数据排序实战
 */
public class ECommerceTransactionSorting {
    
    /**
     * 案例需求：
     * 1. 按交易时间排序，获取最新交易
     * 2. 按用户分组，获取每个用户的交易历史
     * 3. 按金额排序，获取高价值交易
     * 4. 多维度联合排序
     */
    
    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder()
            .appName("ECommerce Transaction Sorting")
            .config("spark.executor.memory", "8g")
            .config("spark.executor.cores", "4")
            .config("spark.dynamicAllocation.enabled", "true")
            .getOrCreate();
        
        // 读取交易数据（Parquet格式）
        Dataset<Row> transactions = spark.read()
            .parquet("hdfs://transactions/*.parquet")
            .cache();  // 缓存频繁访问的数据
        
        // 场景1：按时间排序获取最新交易（全排序）
        Dataset<Row> latestTransactions = transactions
            .orderBy(col("transaction_time").desc())
            .limit(1000000);
        
        // 场景2：按用户分组排序（二次排序）
        WindowSpec userWindow = Window
            .partitionBy("user_id")
            .orderBy(col("transaction_time").desc());
        
        Dataset<Row> userTransactionHistory = transactions
            .withColumn("row_num", row_number().over(userWindow))
            .filter(col("row_num") <= 100)  // 每个用户最近100笔交易
            .drop("row_num");
        
        // 场景3：高价值交易排序（Top-N模式）
        Dataset<Row> highValueTransactions = transactions
            .select("transaction_id", "user_id", "amount", "transaction_time")
            .filter(col("amount") > 1000)
            .sortWithinPartitions(col("amount").desc())
            .repartition(100, col("user_id"))  // 基于用户ID重新分区
            .sort(col("amount").desc());
        
        // 场景4：多维度联合排序（时间+金额+用户）
        Dataset<Row> multiDimSorted = transactions
            .repartitionByRange(1000, 
                col("transaction_date"),
                col("user_segment"))
            .sortWithinPartitions(
                col("transaction_time").desc(),
                col("amount").desc(),
                col("user_id"))
            .sort(col("transaction_date").desc(),
                  col("user_segment"));
        
        // 写入排序结果
        String outputPath = "hdfs://sorted-transactions/";
        
        latestTransactions.write()
            .mode(SaveMode.Overwrite)
            .parquet(outputPath + "/latest");
        
        userTransactionHistory.write()
            .mode(SaveMode.Overwrite)
            .parquet(outputPath + "/by_user");
        
        highValueTransactions.write()
            .mode(SaveMode.Overwrite)
            .parquet(outputPath + "/high_value");
        
        // 创建索引加速查询
        createIndexes(spark, outputPath);
        
        spark.stop();
    }
    
    /**
     * 创建查询索引
     */
    private static void createIndexes(SparkSession spark, String dataPath) {
        
        // 1. 为时间字段创建范围索引
        Dataset<Row> timeIndex = spark.read()
            .parquet(dataPath + "/latest")
            .select("transaction_id", "transaction_time")
            .sort("transaction_time");
        
        timeIndex.write()
            .mode(SaveMode.Overwrite)
            .parquet(dataPath + "/index/time_index");
        
        // 2. 为用户ID创建哈希索引
        Dataset<Row> userIdIndex = spark.read()
            .parquet(dataPath + "/by_user")
            .select("user_id", "transaction_id", "transaction_time")
            .sortWithinPartitions("user_id");
        
        userIdIndex.write()
            .mode(SaveMode.Overwrite)
            .parquet(dataPath + "/index/user_id_index");
        
        // 3. 创建布隆过滤器索引
        Dataset<Row> bloomFilterData = spark.read()
            .parquet(dataPath + "/high_value")
            .select("user_id", "transaction_id");
        
        // 生成布隆过滤器
        BloomFilterIndex bloomFilter = createBloomFilter(bloomFilterData);
        
        // 保存布隆过滤器
        saveBloomFilter(bloomFilter, dataPath + "/index/bloom_filter");
    }
}

2. 最佳实践总结

架构设计最佳实践

java

复制

下载

复制代码

/**
 * 分布式排序最佳实践总结
 */
public class SortingBestPractices {
    
    /**
     * 实践1：根据数据特征选择排序模式
     */
    public SortStrategy selectOptimalStrategy(DataCharacteristics characteristics) {
        if (characteristics.isSortedInput()) {
            return SortStrategy.MERGE_ONLY;  // 输入已部分有序
        } else if (characteristics.getDataSize() < 1_000_000) {
            return SortStrategy.SINGLE_NODE;  // 小数据量单机排序
        } else if (characteristics.hasSkewness()) {
            return SortStrategy.SKEW_AWARE;   // 有数据倾斜
        } else {
            return SortStrategy.DISTRIBUTED;  // 大数据量分布式排序
        }
    }
    
    /**
     * 实践2：合理的分区策略
     */
    public void configureOptimalPartitioning(SparkSession spark, 
                                            Dataset<?> dataset) {
        long dataSize = estimateDataSize(dataset);
        int numCores = getTotalCores(spark);
        int memoryPerCore = getMemoryPerCore(spark);
        
        // 计算最优分区数
        int optimalPartitions = (int) Math.ceil(
            (double) dataSize / (memoryPerCore * numCores * 0.7)
        );
        
        // 确保分区数在合理范围内
        optimalPartitions = Math.max(100, Math.min(optimalPartitions, 10000));
        
        spark.conf().set("spark.sql.shuffle.partitions", 
                        String.valueOf(optimalPartitions));
    }
    
    /**
     * 实践3：内存优化配置
     */
    public Map<String, String> getMemoryOptimizationConfigs() {
        Map<String, String> configs = new HashMap<>();
        
        // 堆外内存配置
        configs.put("spark.memory.offHeap.enabled", "true");
        configs.put("spark.memory.offHeap.size", "2g");
        
        // 序列化配置
        configs.put("spark.serializer", 
                   "org.apache.spark.serializer.KryoSerializer");
        
        // 内存管理
        configs.put("spark.memory.fraction", "0.8");
        configs.put("spark.memory.storageFraction", "0.3");
        
        // Shuffle优化
        configs.put("spark.shuffle.spill.compress", "true");
        configs.put("spark.shuffle.compress", "true");
        configs.put("spark.io.compression.codec", "lz4");
        
        return configs;
    }
    
    /**
     * 实践4：容错配置
     */
    public Map<String, String> getFaultToleranceConfigs() {
        Map<String, String> configs = new HashMap<>();
        
        // Checkpoint配置
        configs.put("spark.checkpoint.dir", "hdfs://checkpoints/");
        configs.put("spark.sql.streaming.checkpointLocation", 
                   "hdfs://streaming-checkpoints/");
        
        // 推测执行
        configs.put("spark.speculation", "true");
        configs.put("spark.speculation.interval", "1000");
        configs.put("spark.speculation.multiplier", "1.5");
        
        // 重试配置
        configs.put("spark.task.maxFailures", "4");
        configs.put("spark.stage.maxConsecutiveAttempts", "4");
        
        return configs;
    }
    
    /**
     * 实践5：监控告警配置
     */
    public MonitoringConfig getMonitoringConfig() {
        MonitoringConfig config = new MonitoringConfig();
        
        // 性能指标阈值
        config.setShuffleSpillThreshold(0.3);  // Shuffle spill超过30%时告警
        config.setMemoryUsageThreshold(0.85);  // 内存使用超过85%时告警
        config.setCpuUsageThreshold(0.9);      // CPU使用超过90%时告警
        config.setDataSkewThreshold(3.0);      // 数据倾斜超过3倍时告警
        
        // 监控频率
        config.setMetricsCollectionInterval(5000);  // 5秒收集一次指标
        config.setAlertCheckInterval(60000);        // 1分钟检查一次告警
        
        return config;
    }
}

总结

分布式排序与聚合是现代大数据处理的核心技术，其设计需要综合考虑数据特征 、计算资源 、性能要求 和成本约束。以下是关键要点：

核心原则：

数据本地性优先：尽量减少数据移动
增量处理：避免全量重排序
资源感知：动态调整计算资源
容错设计：确保作业可靠性
监控调优：持续优化性能

技术选型建议：

场景	推荐技术	原因
批处理全排序	MapReduce全排序	成熟稳定，容错性好
交互式查询	Spark SQL + 索引	响应快，支持复杂查询
实时流排序	Flink + 时间窗口	低延迟，精确一次语义
海量数据排序	分治策略 + 外部排序	内存友好，可扩展
多维排序	Z-Order + 索引	支持复杂查询模式

未来趋势：

硬件加速：GPU/TPU加速排序算法
智能化：AI驱动的自动优化
存算分离：云原生架构
联邦学习：隐私保护下的分布式排序
量子计算：量子排序算法探索

分布式排序与聚合技术仍在快速发展，随着数据量的持续增长和计算需求的多样化，新的优化技术和架构模式将不断涌现。关键在于保持技术敏感性，根据实际业务需求选择最合适的解决方案。