一、分布式排序架构概览
1. 分层排序架构
图表
代码
复制
下载
全屏
graph TB
A[原始数据] --> B[数据分片]
B --> C[局部排序]
B --> D[局部排序]
B --> E[局部排序]
C --> F[归并节点]
D --> F
E --> F
F --> G[全局排序结果]
subgraph "排序优化层"
H[采样预估]
I[动态分区]
J[负载均衡]
end
H --> B
I --> B
J --> C
2. 核心排序模式对比
java
复制
下载
// 分布式排序模式枚举
public enum DistributedSortPattern {
/**
* 模式1: 全排序 (Total Order Sort)
* 所有数据全局有序
*/
TOTAL_ORDER("全排序", "全局完全有序", "MapReduce排序",
Arrays.asList("全局样本", "范围分区", "全局归并")),
/**
* 模式2: 分区排序 (Partitioned Sort)
* 每个分区内部有序,分区间无序
*/
PARTITIONED("分区排序", "分区内有序", "Spark sortByKey",
Arrays.asList("哈希分区", "局部排序", "分区归并")),
/**
* 模式3: 二次排序 (Secondary Sort)
* 主键有序,次键分组内有序
*/
SECONDARY("二次排序", "主次键有序", "Hive Cluster By",
Arrays.asList("复合键", "自定义分区", "分组排序")),
/**
* 模式4: 拓扑排序 (Topological Sort)
* 基于依赖关系排序
*/
TOPOLOGICAL("拓扑排序", "依赖关系序", "DAG调度",
Arrays.asList("图遍历", "依赖检测", "并行调度"));
private final String name;
private final String description;
private final String typicalUse;
private final List<String> keyTechniques;
DistributedSortPattern(String name, String description,
String typicalUse, List<String> keyTechniques) {
this.name = name;
this.description = description;
this.typicalUse = typicalUse;
this.keyTechniques = keyTechniques;
}
/**
* 根据数据特征选择排序模式
*/
public static DistributedSortPattern selectPattern(DataCharacteristics characteristics) {
// 数据量级
long dataSize = characteristics.getDataSize();
// 数据分布
DistributionType distribution = characteristics.getDistribution();
// 排序要求
SortRequirement requirement = characteristics.getSortRequirement();
if (requirement.isTotalOrder()) {
if (dataSize < 1_000_000_000L) { // < 10亿
return TOTAL_ORDER;
} else {
// 超大规模数据使用优化版全排序
return TOTAL_ORDER;
}
} else if (requirement.hasSecondaryKey()) {
return SECONDARY;
} else if (characteristics.hasDependencies()) {
return TOPOLOGICAL;
} else {
return PARTITIONED;
}
}
}
二、分布式排序算法实现
1. MapReduce全排序实现
java
复制
下载
// MapReduce全排序核心实现
public class MapReduceTotalSort {
/**
* Mapper: 分区采样与局部排序
*/
public static class SamplingMapper
extends Mapper<LongWritable, Text, Text, Text> {
private final List<Text> samples = new ArrayList<>();
private static final int SAMPLE_SIZE = 100000;
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 1. 采样(蓄水池采样算法)
if (samples.size() < SAMPLE_SIZE) {
samples.add(new Text(value));
} else {
// 随机替换
int replaceIndex = (int) (Math.random() * samples.size());
if (replaceIndex < SAMPLE_SIZE) {
samples.set(replaceIndex, new Text(value));
}
}
// 2. 输出(键值交换,以数据为键)
context.write(value, new Text(""));
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
// 3. 发送采样数据到Reducer
for (Text sample : samples) {
context.write(new Text("__SAMPLE__"), sample);
}
}
}
/**
* Partitioner: 基于采样数据范围分区
*/
public static class RangePartitioner extends Partitioner<Text, Text> {
private Text[] splitPoints; // 分区边界点
@Override
public void configure(JobConf job) {
// 从配置获取采样数据并计算分区边界
String[] samples = job.get("mapreduce.totalorder.samples", "").split(",");
// 排序采样数据
Arrays.sort(samples);
// 计算分区边界(均匀分布)
int numPartitions = job.getNumReduceTasks();
splitPoints = new Text[numPartitions - 1];
for (int i = 0; i < splitPoints.length; i++) {
int index = (i + 1) * samples.length / numPartitions;
splitPoints[i] = new Text(samples[index]);
}
}
@Override
public int getPartition(Text key, Text value, int numPartitions) {
// 特殊键处理
if (key.toString().equals("__SAMPLE__")) {
return 0; // 采样数据发送到第一个分区
}
// 二分查找确定分区
return findPartition(key);
}
private int findPartition(Text key) {
int left = 0;
int right = splitPoints.length - 1;
while (left <= right) {
int mid = left + (right - left) / 2;
int cmp = key.compareTo(splitPoints[mid]);
if (cmp == 0) {
return mid + 1;
} else if (cmp < 0) {
right = mid - 1;
} else {
left = mid + 1;
}
}
return left;
}
}
/**
* Reducer: 全局归并排序
*/
public static class TotalSortReducer
extends Reducer<Text, Text, Text, Text> {
private boolean isSampleReducer = false;
@Override
protected void setup(Context context) {
// 判断是否为采样数据Reducer
isSampleReducer = (context.getTaskAttemptID().getTaskID().getId() == 0);
}
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
if (isSampleReducer && key.toString().equals("__SAMPLE__")) {
// 处理采样数据,计算分区边界
List<String> samples = new ArrayList<>();
for (Text value : values) {
samples.add(value.toString());
}
// 计算并保存分区边界
calculateSplitPoints(samples, context);
} else {
// 正常排序输出(每个分区内已有序)
for (Text value : values) {
context.write(key, value);
}
}
}
}
/**
* 完整的MapReduce排序作业配置
*/
public static Job configureTotalSortJob(Path inputPath, Path outputPath)
throws IOException {
Job job = Job.getInstance();
job.setJobName("Distributed-Total-Sort");
// 输入输出配置
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
// Mapper配置
job.setMapperClass(SamplingMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// Partitioner配置
job.setPartitionerClass(RangePartitioner.class);
// Reducer配置
job.setReducerClass(TotalSortReducer.class);
job.setNumReduceTasks(100); // 根据数据量调整
// 排序相关配置
job.setSortComparatorClass(Text.Comparator.class); // 键排序器
job.setGroupingComparatorClass(Text.Comparator.class); // 分组比较器
// 性能优化配置
job.setCombinerClass(IdentityReducer.class); // 使用Combiner减少数据传输
// 压缩配置
job.setCompressMapOutput(true);
job.setMapOutputCompressorClass(GzipCodec.class);
return job;
}
}
2. Spark分布式排序优化
scala
复制
下载
// Spark分布式排序优化实现
object SparkDistributedSort {
// 案例:电商订单数据排序(100亿条记录)
case class Order(orderId: Long, userId: Long, amount: Double, timestamp: Long)
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("Large-Scale Order Sorting")
.config("spark.sql.adaptive.enabled", "true")
.config("spark.sql.adaptive.coalescePartitions.enabled", "true")
.config("spark.sql.adaptive.skewJoin.enabled", "true")
.config("spark.sql.shuffle.partitions", "1000") // 初始分区数
.master("spark://master:7077")
.getOrCreate()
// 1. 读取海量数据
val ordersDF = spark.read
.parquet("hdfs://orders/*.parquet")
.repartition(1000) // 数据重分区
// 2. 智能采样预估数据分布
val sampleDF = ordersDF.sample(0.001) // 0.1%采样
val distribution = analyzeDistribution(sampleDF)
// 3. 基于采样的动态分区策略
val partitionBounds = calculateOptimalPartitions(distribution)
// 4. 分区排序(避免全局排序的shuffle)
val sortedDF = ordersDF
.sortWithinPartitions(col("timestamp").desc) // 分区内排序
.repartitionByRange(1000, col("userId")) // 基于userId范围分区
// 5. 启用AQE(自适应查询执行)优化
val optimizedDF = sortedDF
.withColumn("date", to_date(col("timestamp")))
.repartition(col("date")) // 按日期重新分区
// 6. 写入排序结果
optimizedDF.write
.partitionBy("date")
.mode(SaveMode.Overwrite)
.parquet("hdfs://sorted-orders/")
// 7. 性能监控
monitorSortingPerformance(spark, optimizedDF)
spark.stop()
}
/**
* 分析数据分布特征
*/
def analyzeDistribution(df: DataFrame): DistributionAnalysis = {
val stats = df.select(
count("*").as("total_count"),
approx_count_distinct(col("userId")).as("distinct_users"),
min(col("timestamp")).as("min_time"),
max(col("timestamp")).as("max_time"),
percentile_approx(col("amount"), 0.5).as("median_amount"),
percentile_approx(col("amount"), 0.95).as("p95_amount")
).collect()
DistributionAnalysis(
totalCount = stats(0).getAs[Long]("total_count"),
distinctUsers = stats(0).getAs[Long]("distinct_users"),
timeRange = stats(0).getAs[Long]("max_time") - stats(0).getAs[Long]("min_time"),
skewFactor = stats(0).getAs[Double]("p95_amount") / stats(0).getAs[Double]("median_amount")
)
}
/**
* 计算最优分区策略
*/
def calculateOptimalPartitions(analysis: DistributionAnalysis): Array[Long] = {
val targetPartitionSize = 128 * 1024 * 1024L // 目标分区大小128MB
val estimatedDataSize = analysis.totalCount * 100 // 估算每条记录100字节
val numPartitions = Math.ceil(estimatedDataSize / targetPartitionSize).toInt
// 处理数据倾斜:对热门用户单独分区
val hotUserThreshold = analysis.totalCount / analysis.distinctUsers * 10
val hotUsers = identifyHotUsers(analysis, hotUserThreshold)
// 生成分区边界
generatePartitionBounds(numPartitions, hotUsers)
}
/**
* 基于RDD的二次排序实现
*/
def secondarySort(ordersRDD: RDD[Order]): RDD[(Long, List[Order])] = {
// 自定义分区器:基于userId分区
val partitioner = new SecondarySortPartitioner(100)
// 创建复合键:(userId, timestamp)
val keyedRDD = ordersRDD.map(order =>
((order.userId, order.timestamp), order)
)
// 使用自定义分区器
val partitionedRDD = keyedRDD.partitionBy(partitioner)
// 分区内按timestamp排序
val sortedRDD = partitionedRDD.mapPartitions { iter =>
// 收集分区内所有数据
val allData = iter.toList
// 分组并排序
allData.groupBy(_._1._1) // 按userId分组
.mapValues(_.map(_._2).sortBy(_.timestamp).reverse) // 按时间倒序
.iterator
}
sortedRDD
}
/**
* 自定义二次排序分区器
*/
class SecondarySortPartitioner(numParts: Int) extends Partitioner {
override def numPartitions: Int = numParts
override def getPartition(key: Any): Int = {
val k = key.asInstanceOf[(Long, Long)]
// 只根据userId分区,timestamp用于排序
Math.abs(k._1.hashCode()) % numPartitions
}
}
}
篇幅限制下面就只能给大家展示小册部分内容了。整理了一份核心面试笔记包括了:Java面试、Spring、JVM、MyBatis、Redis、MySQL、并发编程、微服务、Linux、Springboot、SpringCloud、MQ、Kafc
需要全套面试笔记及答案
【点击此处即可/免费获取】
三、分布式聚合算法
1. MapReduce聚合优化
java
复制
下载
// MapReduce聚合高级优化
public class MapReduceAdvancedAggregation {
/**
* Combiner优化:局部聚合减少数据传输
*/
public static class OptimizedCombiner
extends Reducer<Text, LongWritable, Text, LongWritable> {
private final Map<String, Long> localAggregation = new HashMap<>();
@Override
protected void reduce(Text key, Iterable<LongWritable> values,
Context context) {
long sum = 0;
for (LongWritable value : values) {
sum += value.get();
// 内存控制:定期flush到磁盘
if (localAggregation.size() > 10000) {
flushToContext(context);
}
}
// 累加到本地聚合结果
String keyStr = key.toString();
localAggregation.merge(keyStr, sum, Long::sum);
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
// 输出所有本地聚合结果
flushToContext(context);
}
private void flushToContext(Context context)
throws IOException, InterruptedException {
for (Map.Entry<String, Long> entry : localAggregation.entrySet()) {
context.write(new Text(entry.getKey()),
new LongWritable(entry.getValue()));
}
localAggregation.clear();
}
}
/**
* Reducer端聚合优化:内存+磁盘混合存储
*/
public static class HybridAggregationReducer
extends Reducer<Text, LongWritable, Text, LongWritable> {
// 内存聚合(LRU缓存)
private final Map<String, Long> memoryCache = new LinkedHashMap<String, Long>(1000, 0.75f, true) {
@Override
protected boolean removeEldestEntry(Map.Entry<String, Long> eldest) {
// 超过阈值时,将最旧的条目刷到磁盘
if (size() > 5000) {
flushToDisk(eldest.getKey(), eldest.getValue());
return true;
}
return false;
}
};
// 磁盘存储(用于大数据量)
private Path diskStoragePath;
private SequenceFile.Writer diskWriter;
@Override
protected void setup(Context context) throws IOException {
// 初始化磁盘存储
diskStoragePath = new Path("/tmp/aggregation/" +
context.getTaskAttemptID().toString());
diskWriter = SequenceFile.createWriter(context.getConfiguration(),
SequenceFile.Writer.file(diskStoragePath),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(LongWritable.class));
}
@Override
protected void reduce(Text key, Iterable<LongWritable> values,
Context context) {
long sum = 0;
for (LongWritable value : values) {
sum += value.get();
}
String keyStr = key.toString();
// 尝试内存聚合
Long current = memoryCache.get(keyStr);
if (current != null) {
memoryCache.put(keyStr, current + sum);
} else {
memoryCache.put(keyStr, sum);
}
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
// 1. 输出内存中的所有结果
for (Map.Entry<String, Long> entry : memoryCache.entrySet()) {
context.write(new Text(entry.getKey()),
new LongWritable(entry.getValue()));
}
// 2. 合并磁盘上的结果
mergeDiskResults(context);
// 3. 清理临时文件
cleanupTempFiles();
}
private void flushToDisk(String key, Long value) {
try {
diskWriter.append(new Text(key), new LongWritable(value));
} catch (IOException e) {
// 处理IO异常
}
}
private void mergeDiskResults(Context context)
throws IOException, InterruptedException {
if (diskWriter != null) {
diskWriter.close();
// 读取磁盘文件并聚合
try (SequenceFile.Reader reader = new SequenceFile.Reader(
context.getConfiguration(),
SequenceFile.Reader.file(diskStoragePath))) {
Text key = new Text();
LongWritable value = new LongWritable();
Map<String, Long> diskAggregation = new HashMap<>();
while (reader.next(key, value)) {
String keyStr = key.toString();
diskAggregation.merge(keyStr, value.get(), Long::sum);
}
// 输出磁盘聚合结果
for (Map.Entry<String, Long> entry : diskAggregation.entrySet()) {
context.write(new Text(entry.getKey()),
new LongWritable(entry.getValue()));
}
}
}
}
}
/**
* 两阶段聚合:处理数据倾斜
*/
public static class TwoPhaseAggregation {
/**
* 第一阶段:打散热点数据
*/
public static class PhaseOneMapper
extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] parts = value.toString().split(",");
if (parts.length >= 2) {
String originalKey = parts[0];
long count = Long.parseLong(parts[1]);
// 检测热点key
if (isHotKey(originalKey)) {
// 热点key打散:添加随机后缀
for (int i = 0; i < 10; i++) {
String shuffledKey = originalKey + "_" + i;
context.write(new Text(shuffledKey),
new LongWritable(count / 10));
}
} else {
// 普通key直接输出
context.write(new Text(originalKey),
new LongWritable(count));
}
}
}
private boolean isHotKey(String key) {
// 基于历史统计或采样判断是否为热点
return HOT_KEYS.contains(key);
}
}
/**
* 第二阶段:合并打散的数据
*/
public static class PhaseTwoReducer
extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values,
Context context) {
long sum = 0;
for (LongWritable value : values) {
sum += value.get();
}
String keyStr = key.toString();
// 如果是打散的key,需要合并
if (keyStr.contains("_")) {
String originalKey = keyStr.split("_")[0];
// 将部分聚合结果缓存,等待所有部分到达
cachePartialResult(originalKey, sum);
// 检查是否所有部分都到达
if (allPartsArrived(originalKey)) {
long total = mergeAllParts(originalKey);
context.write(new Text(originalKey),
new LongWritable(total));
clearCache(originalKey);
}
} else {
// 普通key直接输出
context.write(key, new LongWritable(sum));
}
}
}
}
}
2. Spark分布式聚合
scala
复制
下载
// Spark高级聚合优化
object SparkAdvancedAggregation {
// 案例:用户行为分析聚合
case class UserBehavior(userId: Long, action: String, timestamp: Long, value: Double)
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("User Behavior Aggregation")
.config("spark.sql.autoBroadcastJoinThreshold", "-1") // 禁用广播join
.config("spark.sql.shuffle.partitions", "2000")
.getOrCreate()
import spark.implicits._
// 1. 读取用户行为数据(100亿条)
val behaviorDS = spark.read
.parquet("hdfs://user-behavior/*.parquet")
.as[UserBehavior]
.repartition($"userId") // 按userId预分区
// 2. 多维度聚合(使用Spark SQL优化)
behaviorDS.createOrReplaceTempView("user_behavior")
val aggregatedDF = spark.sql("""
-- 多级聚合:小时 -> 天 -> 月
WITH hourly_agg AS (
SELECT
userId,
date_trunc('hour', from_unixtime(timestamp)) as hour,
action,
COUNT(*) as action_count,
SUM(value) as total_value,
AVG(value) as avg_value
FROM user_behavior
WHERE timestamp >= UNIX_TIMESTAMP('2024-01-01')
GROUP BY userId, date_trunc('hour', from_unixtime(timestamp)), action
),
daily_agg AS (
SELECT
userId,
date_trunc('day', hour) as day,
action,
SUM(action_count) as daily_count,
SUM(total_value) as daily_value,
AVG(avg_value) as daily_avg
FROM hourly_agg
GROUP BY userId, date_trunc('day', hour), action
),
monthly_agg AS (
SELECT
userId,
date_trunc('month', day) as month,
action,
SUM(daily_count) as monthly_count,
SUM(daily_value) as monthly_value,
AVG(daily_avg) as monthly_avg,
-- 计算百分位数(需要聚合UDAF)
percentile_approx(daily_value, 0.5) as median_value
FROM daily_agg
GROUP BY userId, date_trunc('month', day), action
)
SELECT * FROM monthly_agg
ORDER BY monthly_count DESC
LIMIT 1000000
""")
// 3. 处理数据倾斜:Salting技术
val skewedDF = handleDataSkew(behaviorDS)
// 4. 增量聚合:维护聚合状态
val incrementalAggDF = incrementalAggregation(behaviorDS)
// 5. 写入聚合结果
aggregatedDF.write
.mode(SaveMode.Overwrite)
.parquet("hdfs://aggregated-results/")
spark.stop()
}
/**
* 处理数据倾斜:Salting + 两阶段聚合
*/
def handleDataSkew(behaviorDS: Dataset[UserBehavior]): DataFrame = {
import behaviorDS.sparkSession.implicits._
// 第一阶段:添加随机后缀打散热点数据
val saltedDS = behaviorDS
.map { behavior =>
val salt = if (isHotUser(behavior.userId)) {
// 热点用户添加随机后缀
s"${behavior.userId}_${Random.nextInt(10)}"
} else {
behavior.userId.toString
}
(salt, behavior.action, behavior.timestamp, behavior.value)
}
.toDF("salted_user_id", "action", "timestamp", "value")
// 第一阶段聚合(在打散的key上)
val stage1Agg = saltedDS
.groupBy("salted_user_id", "action")
.agg(
count("*").as("partial_count"),
sum("value").as("partial_sum"),
avg("value").as("partial_avg")
)
// 第二阶段:还原原始key并聚合
val stage2Agg = stage1Agg
.withColumn("original_user_id",
split($"salted_user_id", "_").getItem(0).cast("long"))
.groupBy("original_user_id", "action")
.agg(
sum("partial_count").as("total_count"),
sum("partial_sum").as("total_sum"),
avg("partial_avg").as("total_avg")
)
stage2Agg
}
/**
* 增量聚合:维护滑动窗口聚合状态
*/
def incrementalAggregation(behaviorDS: Dataset[UserBehavior]): DataFrame = {
import behaviorDS.sparkSession.implicits._
import org.apache.spark.sql.expressions.Window
// 定义滑动窗口(最近24小时,每1小时滑动一次)
val windowSpec = Window
.partitionBy("userId", "action")
.orderBy($"timestamp")
.rangeBetween(-24 * 3600, 0) // 24小时窗口
behaviorDS
.withColumn("timestamp", from_unixtime($"timestamp"))
.withWatermark("timestamp", "1 hour") // 水印处理延迟数据
.groupBy(
window($"timestamp", "1 hour", "1 hour"), // 1小时滑动窗口
$"userId",
$"action"
)
.agg(
count("*").as("hourly_count"),
sum("value").as("hourly_sum"),
avg("value").as("hourly_avg")
)
.withColumn("24h_rolling_count",
sum("hourly_count").over(windowSpec))
.withColumn("24h_rolling_avg",
avg("hourly_avg").over(windowSpec))
}
/**
* 自定义聚合函数:HyperLogLog基数估算
*/
class HyperLogLogAgg extends UserDefinedAggregateFunction {
override def inputSchema: StructType =
StructType(StructField("value", StringType) :: Nil)
override def bufferSchema: StructType =
StructType(StructField("registers", ArrayType(IntegerType)) :: Nil)
override def dataType: DataType = LongType
override def deterministic: Boolean = true
override def initialize(buffer: MutableAggregationBuffer): Unit = {
// 初始化HLL寄存器数组(m=2^14=16384个寄存器)
buffer(0) = Array.fill(16384)(0)
}
override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
val value = input.getString(0)
val hash = MurmurHash3.hash128(value).getLong(0)
// 计算寄存器索引和rho值
val index = (hash & 0x3FFF).toInt // 低14位作为索引
val rho = numberOfLeadingZeros(hash >>> 14) + 1
// 更新寄存器
val registers = buffer.getAs[Seq[Int]](0)
if (rho > registers(index)) {
val updated = registers.updated(index, rho)
buffer.update(0, updated)
}
}
override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
val registers1 = buffer1.getAs[Seq[Int]](0)
val registers2 = buffer2.getAs[Seq[Int]](0)
// 合并两个HLL寄存器(取最大值)
val merged = (registers1 zip registers2).map { case (r1, r2) =>
Math.max(r1, r2)
}
buffer1.update(0, merged)
}
override def evaluate(buffer: Row): Any = {
val registers = buffer.getAs[Seq[Int]](0)
// 计算基数估计
val m = registers.length.toDouble
val alpha = 0.7213 / (1 + 1.079 / m) // 修正系数
val sum = registers.map(r => 1.0 / (1 << r)).sum
val estimate = alpha * m * m / sum
// 小范围修正
val corrected = if (estimate <= 2.5 * m) {
// 线性计数
val zeros = registers.count(_ == 0)
if (zeros > 0) m * math.log(m / zeros) else estimate
} else {
estimate
}
corrected.toLong
}
}
}
四、存储优化与索引
1. 列式存储优化
java
复制
下载
// 列式存储聚合优化
public class ColumnarStorageOptimization {
/**
* Parquet列式存储优化
*/
public class ParquetAggregationOptimizer {
/**
* 基于列统计的谓词下推
*/
public Dataset<Row> optimizeWithPredicatePushdown(
SparkSession spark, String parquetPath) {
// Parquet文件自动收集列统计信息
Dataset<Row> df = spark.read.parquet(parquetPath);
// 启用向量化读取(批量处理)
spark.conf().set("spark.sql.parquet.enableVectorizedReader", "true");
// 启用谓词下推
spark.conf().set("spark.sql.parquet.filterPushdown", "true");
// 启用字典过滤
spark.conf().set("spark.sql.parquet.dictionary.enabled", "true");
// 基于统计信息的过滤
return df.filter("amount > 100 AND amount < 1000") // 范围过滤
.filter("category IN ('electronics', 'books')") // IN过滤
.filter("timestamp >= '2024-01-01'"); // 时间过滤
}
/**
* 列裁剪优化:只读取需要的列
*/
public Dataset<Row> optimizeColumnPruning(
Dataset<Row> df, List<String> requiredColumns) {
// 只选择需要的列
Dataset<Row> prunedDF = df.selectExpr(requiredColumns.toArray(new String[0]));
// 启用列裁剪
spark.conf().set("spark.sql.parquet.columnarReaderBatchSize", "4096");
return prunedDF;
}
/**
* 分区裁剪:基于分区键过滤
*/
public Dataset<Row> optimizePartitionPruning(
Dataset<Row> df, String partitionColumn, String partitionValue) {
// 创建分区视图
df.createOrReplaceTempView("data");
// 使用分区键过滤,Spark会自动进行分区裁剪
return spark.sql(
String.format("SELECT * FROM data WHERE %s = '%s'",
partitionColumn, partitionValue)
);
}
}
/**
* ORC存储格式优化
*/
public class ORCOptimization {
public Dataset<Row> optimizeORCReading(SparkSession spark, String orcPath) {
// 启用ORC优化
spark.conf().set("spark.sql.orc.enabled", "true");
spark.conf().set("spark.sql.orc.filterPushdown", "true");
spark.conf().set("spark.sql.orc.splits.include.file.footer", "true");
// 使用索引加速
spark.conf().set("spark.sql.orc.impl", "native");
spark.conf().set("spark.sql.orc.skipCorruptRecords", "true");
// Bloom Filter索引
spark.conf().set("spark.sql.orc.bloom.filter.columns", "userId,category");
spark.conf().set("spark.sql.orc.bloom.filter.fpp", "0.05");
return spark.read().orc(orcPath);
}
}
/**
* 数据湖格式优化(Delta Lake/Iceberg)
*/
public class DataLakeOptimization {
/**
* Delta Lake Z-Order优化
*/
public void optimizeWithZOrder(Dataset<Row> df, String[] zOrderColumns) {
// 将数据写入Delta Lake
df.write()
.format("delta")
.mode("overwrite")
.save("/data/delta-table");
// 执行Z-Order优化(多维聚类)
spark.sql("OPTIMIZE delta.`/data/delta-table` " +
"ZORDER BY (" + String.join(",", zOrderColumns) + ")");
// 收集统计信息
spark.sql("ANALYZE TABLE delta.`/data/delta-table` COMPUTE STATISTICS");
}
/**
* Iceberg隐藏分区
*/
public void optimizeWithHiddenPartitions(Dataset<Row> df) {
// 创建Iceberg表,定义隐藏分区
spark.sql("""
CREATE TABLE iceberg_db.user_behavior (
userId BIGINT,
action STRING,
timestamp TIMESTAMP,
value DOUBLE
)
USING iceberg
PARTITIONED BY (days(timestamp), bucket(10, userId))
TBLPROPERTIES (
'write.format.default'='parquet',
'write.parquet.compression-codec'='zstd'
)
""");
// 写入数据,Iceberg会自动管理分区
df.writeTo("iceberg_db.user_behavior").append();
// 过期快照清理
spark.sql("""
CALL iceberg.system.expire_snapshots(
'iceberg_db.user_behavior',
TIMESTAMP '2024-01-01 00:00:00'
)
""");
}
}
}
2. 索引加速策略
java
复制
下载
/**
* 跳跃表索引:支持范围查询
*/
public class SkipListIndex {
private static class SkipListNode {
String key;
List<DataLocation> locations;
SkipListNode[] forward;
public SkipListNode(String key, int level) {
this.key = key;
this.locations = new ArrayList<>();
this.forward = new SkipListNode[level + 1];
}
}
private SkipListNode header;
private int maxLevel;
private int size;
private Random random;
public SkipListIndex(int maxLevel) {
this.maxLevel = maxLevel;
this.header = new SkipListNode(null, maxLevel);
this.random = new Random();
this.size = 0;
}
/**
* 插入索引
*/
public void insert(String key, DataLocation location) {
SkipListNode[] update = new SkipListNode[maxLevel + 1];
SkipListNode current = header;
// 查找插入位置
for (int i = maxLevel; i >= 0; i--) {
while (current.forward[i] != null &&
current.forward[i].key.compareTo(key) < 0) {
current = current.forward[i];
}
update[i] = current;
}
current = current.forward[0];
if (current != null && current.key.equals(key)) {
// 键已存在,添加位置
current.locations.add(location);
} else {
// 创建新节点
int level = randomLevel();
SkipListNode newNode = new SkipListNode(key, level);
newNode.locations.add(location);
// 更新指针
for (int i = 0; i <= level; i++) {
newNode.forward[i] = update[i].forward[i];
update[i].forward[i] = newNode;
}
size++;
}
}
/**
* 范围查询
*/
public List<DataLocation> rangeQuery(String startKey, String endKey) {
List<DataLocation> results = new ArrayList<>();
SkipListNode current = findNode(startKey);
while (current != null && current.key.compareTo(endKey) <= 0) {
results.addAll(current.locations);
current = current.forward[0];
}
return results;
}
private SkipListNode findNode(String key) {
SkipListNode current = header;
for (int i = maxLevel; i >= 0; i--) {
while (current.forward[i] != null &&
current.forward[i].key.compareTo(key) < 0) {
current = current.forward[i];
}
}
return current.forward[0];
}
private int randomLevel() {
int level = 0;
while (random.nextDouble() < 0.5 && level < maxLevel) {
level++;
}
return level;
}
}
/**
* 布隆过滤器索引:快速过滤不存在的数据
*/
public class BloomFilterIndex {
private final BitSet bitSet;
private final int size;
private final int[] hashSeeds;
private final int hashFunctions;
public BloomFilterIndex(int expectedSize, double falsePositiveRate) {
this.size = optimalBitSetSize(expectedSize, falsePositiveRate);
this.hashFunctions = optimalHashFunctions(expectedSize, size);
this.bitSet = new BitSet(size);
this.hashSeeds = generateHashSeeds(hashFunctions);
}
/**
* 添加元素到布隆过滤器
*/
public void add(String key) {
for (int i = 0; i < hashFunctions; i++) {
int hash = hash(key, hashSeeds[i]);
bitSet.set(Math.abs(hash % size));
}
}
/**
* 检查元素是否存在
*/
public boolean mightContain(String key) {
for (int i = 0; i < hashFunctions; i++) {
int hash = hash(key, hashSeeds[i]);
if (!bitSet.get(Math.abs(hash % size))) {
return false;
}
}
return true;
}
/**
* 批量添加元素
*/
public void addAll(Collection<String> keys) {
for (String key : keys) {
add(key);
}
}
/**
* 合并两个布隆过滤器
*/
public void merge(BloomFilterIndex other) {
if (this.size != other.size || this.hashFunctions != other.hashFunctions) {
throw new IllegalArgumentException("Bloom filters must have same configuration");
}
this.bitSet.or(other.bitSet);
}
private int optimalBitSetSize(int n, double p) {
return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
}
private int optimalHashFunctions(int n, int m) {
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
}
private int[] generateHashSeeds(int numHashes) {
int[] seeds = new int[numHashes];
Random random = new Random();
for (int i = 0; i < numHashes; i++) {
seeds[i] = random.nextInt();
}
return seeds;
}
private int hash(String key, int seed) {
return MurmurHash3.hash32x86(key.getBytes(), seed);
}
}
/**
* 分布式倒排索引
*/
public class DistributedInvertedIndex {
/**
* 构建倒排索引
*/
public JavaRDD<Tuple2<String, List<Long>>> buildInvertedIndex(
JavaRDD<String> documents, int numPartitions) {
// 第一步:文档分词
JavaRDD<Tuple2<String, Long>> termDocPairs = documents.zipWithIndex()
.flatMapToPair(document -> {
List<Tuple2<String, Long>> pairs = new ArrayList<>();
long docId = document._2;
String text = document._1;
// 分词处理
String[] terms = text.toLowerCase()
.replaceAll("[^a-z0-9\\s]", "")
.split("\\s+");
for (String term : terms) {
if (!term.isEmpty()) {
pairs.add(new Tuple2<>(term, docId));
}
}
return pairs.iterator();
});
// 第二步:聚合相同term的文档列表
JavaPairRDD<String, List<Long>> invertedIndex = termDocPairs
.groupByKey(numPartitions)
.mapValues(docIds -> {
// 去重并排序
List<Long> sortedIds = new ArrayList<>();
for (Long docId : docIds) {
if (!sortedIds.contains(docId)) {
sortedIds.add(docId);
}
}
Collections.sort(sortedIds);
return sortedIds;
});
// 第三步:压缩存储(增量编码)
JavaPairRDD<String, byte[]> compressedIndex = invertedIndex
.mapValues(docIds -> {
return compressDocIds(docIds);
});
return invertedIndex;
}
/**
* 增量编码压缩
*/
private byte[] compressDocIds(List<Long> docIds) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
try {
long prevId = 0;
for (Long docId : docIds) {
long delta = docId - prevId;
writeVariableByte(delta, dos);
prevId = docId;
}
} catch (IOException e) {
// 处理异常
}
return baos.toByteArray();
}
/**
* 变长字节编码
*/
private void writeVariableByte(long value, DataOutputStream dos)
throws IOException {
while (value > 127) {
dos.writeByte((int) (value & 0x7F));
value >>>= 7;
}
dos.writeByte((int) (value | 0x80));
}
/**
* 查询倒排索引
*/
public JavaRDD<Long> queryInvertedIndex(
JavaPairRDD<String, List<Long>> index,
List<String> queryTerms,
QueryType queryType) {
// 获取每个term的文档列表
List<JavaRDD<Long>> termDocRDDs = new ArrayList<>();
for (String term : queryTerms) {
JavaRDD<Long> docRDD = index
.filter(tuple -> tuple._1.equals(term))
.flatMap(tuple -> tuple._2.iterator());
termDocRDDs.add(docRDD);
}
// 根据查询类型合并结果
JavaRDD<Long> resultRDD;
switch (queryType) {
case AND:
// 求交集
resultRDD = termDocRDDs.get(0);
for (int i = 1; i < termDocRDDs.size(); i++) {
resultRDD = resultRDD.intersection(termDocRDDs.get(i));
}
break;
case OR:
// 求并集
resultRDD = termDocRDDs.get(0);
for (int i = 1; i < termDocRDDs.size(); i++) {
resultRDD = resultRDD.union(termDocRDDs.get(i))
.distinct();
}
break;
case NOT:
// 求差集
resultRDD = termDocRDDs.get(0)
.subtract(termDocRDDs.get(1));
break;
default:
throw new IllegalArgumentException("Unsupported query type");
}
return resultRDD;
}
}
/**
* 实时索引更新
*/
public class RealTimeIndexing {
/**
* LSM树(Log-Structured Merge-Tree)索引
*/
public class LSMTreeIndex {
// MemTable(内存表)
private final ConcurrentSkipListMap<String, String> memTable;
private final int memTableThreshold;
// SSTable文件列表
private final List<SSTable> ssTables;
// 布隆过滤器加速查询
private final BloomFilterIndex bloomFilter;
public LSMTreeIndex(int memTableThreshold) {
this.memTable = new ConcurrentSkipListMap<>();
this.memTableThreshold = memTableThreshold;
this.ssTables = new ArrayList<>();
this.bloomFilter = new BloomFilterIndex(1000000, 0.01);
}
/**
* 写入数据
*/
public void put(String key, String value) {
synchronized (memTable) {
memTable.put(key, value);
bloomFilter.add(key);
// 检查MemTable大小
if (memTable.size() >= memTableThreshold) {
flushMemTableToDisk();
}
}
}
/**
* 读取数据
*/
public String get(String key) {
// 先查布隆过滤器
if (!bloomFilter.mightContain(key)) {
return null;
}
// 先查MemTable
String value = memTable.get(key);
if (value != null) {
return value;
}
// 再查SSTable(从新到旧)
for (int i = ssTables.size() - 1; i >= 0; i--) {
value = ssTables.get(i).get(key);
if (value != null) {
return value;
}
}
return null;
}
/**
* 将MemTable刷到磁盘
*/
private void flushMemTableToDisk() {
synchronized (memTable) {
if (memTable.isEmpty()) {
return;
}
// 创建SSTable文件
SSTable ssTable = new SSTable(memTable);
ssTables.add(ssTable);
// 清空MemTable
memTable.clear();
// 定期合并SSTable
if (ssTables.size() > 10) {
compactSSTables();
}
}
}
/**
* 合并SSTable(减少读取时的I/O)
*/
private void compactSSTables() {
// 选择要合并的SSTable(通常是较旧的)
List<SSTable> toCompact = selectSSTablesForCompaction();
// 多路归并
MergeIterator mergeIterator = new MergeIterator(toCompact);
// 创建新的SSTable
SSTable newSsTable = new SSTable(mergeIterator);
// 替换旧的SSTable
ssTables.removeAll(toCompact);
ssTables.add(newSsTable);
}
}
/**
* SSTable(Sorted String Table)
*/
class SSTable {
private final String filePath;
private final NavigableMap<String, Long> sparseIndex; // 稀疏索引
private final BloomFilterIndex bloomFilter;
public SSTable(NavigableMap<String, String> data) {
this.filePath = createSSTableFile(data);
this.sparseIndex = buildSparseIndex(data);
this.bloomFilter = buildBloomFilter(data);
}
public String get(String key) {
// 先查布隆过滤器
if (!bloomFilter.mightContain(key)) {
return null;
}
// 查找数据块
Map.Entry<String, Long> floorEntry = sparseIndex.floorEntry(key);
if (floorEntry == null) {
return null;
}
// 读取数据块并查找
return readAndSearchBlock(floorEntry.getValue(), key);
}
private String readAndSearchBlock(long offset, String key) {
// 读取数据块并在内存中查找
try (RandomAccessFile file = new RandomAccessFile(filePath, "r")) {
file.seek(offset);
// 读取数据块大小
int blockSize = file.readInt();
byte[] blockData = new byte[blockSize];
file.readFully(blockData);
// 解析数据块
return binarySearchInBlock(blockData, key);
} catch (IOException e) {
return null;
}
}
}
}
五、容错与一致性保障
1. 容错机制设计
java
复制
下载
/**
* 分布式排序容错机制
*/
public class FaultTolerantSorting {
/**
* Checkpoint机制:定期保存排序状态
*/
public static class CheckpointManager {
private final String checkpointDir;
private final int checkpointInterval;
public CheckpointManager(String checkpointDir, int interval) {
this.checkpointDir = checkpointDir;
this.checkpointInterval = interval;
}
/**
* 创建检查点
*/
public void createCheckpoint(
JavaRDD<Row> rdd,
int stageId,
String checkpointName) {
// 保存RDD到可靠的存储系统
rdd.saveAsObjectFile(
checkpointDir + "/" + stageId + "/" + checkpointName
);
// 保存元数据
saveMetadata(stageId, checkpointName, rdd.getNumPartitions());
}
/**
* 从检查点恢复
*/
public JavaRDD<Row> recoverFromCheckpoint(
SparkContext sc,
int stageId,
String checkpointName) {
// 加载元数据
CheckpointMetadata metadata = loadMetadata(stageId, checkpointName);
// 恢复RDD
JavaRDD<Row> recoveredRDD = sc.objectFile(
checkpointDir + "/" + stageId + "/" + checkpointName,
metadata.getNumPartitions()
);
return recoveredRDD;
}
}
/**
* 推测执行(Speculative Execution)
*/
public static class SpeculativeExecution {
/**
* 检测慢任务并启动推测执行
*/
public void monitorAndSpeculate(List<TaskInfo> tasks) {
// 计算任务平均执行时间
double avgDuration = calculateAverageDuration(tasks);
// 检测慢任务(超过平均时间2倍)
List<TaskInfo> slowTasks = tasks.stream()
.filter(task -> task.getDuration() > avgDuration * 2)
.collect(Collectors.toList());
// 启动推测执行
for (TaskInfo slowTask : slowTasks) {
if (shouldSpeculate(slowTask)) {
launchSpeculativeTask(slowTask);
}
}
}
private boolean shouldSpeculate(TaskInfo task) {
// 检查资源使用情况
double cpuUsage = task.getCpuUsage();
double memUsage = task.getMemUsage();
// 判断是否为资源竞争导致的慢
if (cpuUsage < 0.3 && memUsage < 0.3) {
return true; // 资源充足,可能是节点故障
}
// 检查网络情况
double networkDelay = task.getNetworkDelay();
return networkDelay < 100; // 网络延迟小于100ms
}
}
/**
* 数据备份与恢复
*/
public static class DataReplication {
private final int replicationFactor;
private final List<StorageNode> storageNodes;
public DataReplication(int replicationFactor) {
this.replicationFactor = replicationFactor;
this.storageNodes = new ArrayList<>();
}
/**
* 多副本写入
*/
public void writeWithReplication(String dataId, byte[] data) {
List<StorageNode> selectedNodes = selectNodesForReplication(dataId);
// 并发写入多个副本
List<CompletableFuture<Void>> futures = selectedNodes.stream()
.map(node -> CompletableFuture.runAsync(() -> {
node.writeData(dataId, data);
}))
.collect(Collectors.toList());
// 等待多数副本写入成功
CompletableFuture<Void> allFutures = CompletableFuture.allOf(
futures.toArray(new CompletableFuture[0])
);
try {
allFutures.get();
} catch (Exception e) {
handleWriteFailure(dataId, selectedNodes);
}
}
/**
* 读取数据(优先读取最快副本)
*/
public byte[] readWithReplication(String dataId) {
List<StorageNode> replicaNodes = findReplicaNodes(dataId);
// 并发读取多个副本,取最先返回的结果
return replicaNodes.parallelStream()
.map(node -> {
try {
return node.readData(dataId);
} catch (Exception e) {
return null;
}
})
.filter(Objects::nonNull)
.findFirst()
.orElseThrow(() -> new DataNotFoundException(dataId));
}
/**
* 数据修复(当副本数不足时)
*/
public void repairReplication(String dataId) {
List<StorageNode> existingReplicas = findReplicaNodes(dataId);
if (existingReplicas.size() < replicationFactor) {
// 读取数据
byte[] data = existingReplicas.get(0).readData(dataId);
// 选择新的存储节点
List<StorageNode> newNodes = selectNewNodesForReplication(dataId, existingReplicas);
// 复制数据到新节点
for (StorageNode node : newNodes) {
node.writeData(dataId, data);
}
}
}
}
}
/**
* 一致性保障机制
*/
public class ConsistencyGuarantee {
/**
* 分布式事务(两阶段提交)
*/
public static class TwoPhaseCommit {
private final List<ResourceManager> participants;
private final TransactionCoordinator coordinator;
public TwoPhaseCommit(List<ResourceManager> participants) {
this.participants = participants;
this.coordinator = new TransactionCoordinator();
}
/**
* 执行两阶段提交
*/
public boolean executeTransaction(Transaction transaction) {
// 第一阶段:准备阶段
List<PrepareResult> prepareResults = participants.parallelStream()
.map(participant -> participant.prepare(transaction))
.collect(Collectors.toList());
boolean allPrepared = prepareResults.stream()
.allMatch(PrepareResult::isPrepared);
if (!allPrepared) {
// 有参与者准备失败,回滚事务
abortTransaction(transaction);
return false;
}
// 第二阶段:提交阶段
List<CommitResult> commitResults = participants.parallelStream()
.map(participant -> participant.commit(transaction))
.collect(Collectors.toList());
boolean allCommitted = commitResults.stream()
.allMatch(CommitResult::isCommitted);
if (!allCommitted) {
// 提交失败,需要人工干预
handleCommitFailure(transaction);
return false;
}
return true;
}
private void abortTransaction(Transaction transaction) {
participants.forEach(participant ->
participant.rollback(transaction)
);
}
}
/**
* 最终一致性实现
*/
public static class EventualConsistency {
private final ConflictResolver conflictResolver;
private final VectorClock vectorClock;
public EventualConsistency() {
this.conflictResolver = new LastWriteWinsResolver();
this.vectorClock = new VectorClock();
}
/**
* 基于向量时钟的版本管理
*/
public void writeWithVersion(String key, String value, String nodeId) {
// 更新向量时钟
vectorClock.increment(nodeId);
// 创建带版本的数据
VersionedData versionedData = new VersionedData(
key, value, vectorClock.getClock()
);
// 写入数据(多副本)
writeToReplicas(versionedData);
}
/**
* 读取数据并解决冲突
*/
public String readWithConflictResolution(String key) {
// 从多个副本读取数据
List<VersionedData> replicas = readFromReplicas(key);
if (replicas.isEmpty()) {
return null;
}
if (replicas.size() == 1) {
return replicas.get(0).getValue();
}
// 检测冲突
if (hasConflict(replicas)) {
// 解决冲突
VersionedData resolved = conflictResolver.resolve(replicas);
// 修复副本(读时修复)
repairReplicas(key, resolved);
return resolved.getValue();
}
// 没有冲突,返回最新版本
return getLatestVersion(replicas).getValue();
}
/**
* 读时修复(Read Repair)
*/
private void repairReplicas(String key, VersionedData correctData) {
List<StorageNode> replicaNodes = findReplicaNodes(key);
replicaNodes.parallelStream().forEach(node -> {
VersionedData nodeData = node.readVersionedData(key);
if (!correctData.getVersion().equals(nodeData.getVersion())) {
node.writeVersionedData(correctData);
}
});
}
}
/**
* 向量时钟实现
*/
static class VectorClock {
private final Map<String, Long> clock = new ConcurrentHashMap<>();
public void increment(String nodeId) {
clock.merge(nodeId, 1L, Long::sum);
}
public Map<String, Long> getClock() {
return new HashMap<>(clock);
}
public boolean happenedBefore(Map<String, Long> vc1, Map<String, Long> vc2) {
for (Map.Entry<String, Long> entry : vc1.entrySet()) {
Long v2 = vc2.get(entry.getKey());
if (v2 == null || entry.getValue() > v2) {
return false;
}
}
return true;
}
}
}
篇幅限制下面就只能给大家展示小册部分内容了。整理了一份核心面试笔记包括了:Java面试、Spring、JVM、MyBatis、Redis、MySQL、并发编程、微服务、Linux、Springboot、SpringCloud、MQ、Kafc
需要全套面试笔记及答案
【点击此处即可/免费获取】
2. 性能监控与调优
java
复制
下载
/**
* 分布式排序性能监控
*/
public class PerformanceMonitor {
private final MetricsCollector metricsCollector;
private final AlertManager alertManager;
public PerformanceMonitor() {
this.metricsCollector = new MetricsCollector();
this.alertManager = new AlertManager();
}
/**
* 监控关键指标
*/
public void monitorSortingJob(SortingJob job) {
// 收集Shuffle指标
ShuffleMetrics shuffleMetrics = metricsCollector.collectShuffleMetrics();
monitorShufflePerformance(shuffleMetrics);
// 收集内存使用情况
MemoryMetrics memoryMetrics = metricsCollector.collectMemoryMetrics();
monitorMemoryUsage(memoryMetrics);
// 收集网络I/O
NetworkMetrics networkMetrics = metricsCollector.collectNetworkMetrics();
monitorNetworkTraffic(networkMetrics);
// 收集CPU使用率
CpuMetrics cpuMetrics = metricsCollector.collectCpuMetrics();
monitorCpuUsage(cpuMetrics);
// 检测数据倾斜
detectDataSkew(job);
}
/**
* 检测和预警数据倾斜
*/
private void detectDataSkew(SortingJob job) {
List<PartitionMetrics> partitionMetrics = job.getPartitionMetrics();
// 计算分区间数据量差异
long[] partitionSizes = partitionMetrics.stream()
.mapToLong(PartitionMetrics::getSize)
.toArray();
double avgSize = Arrays.stream(partitionSizes).average().orElse(0);
double maxSize = Arrays.stream(partitionSizes).max().orElse(0);
// 计算倾斜度
double skewRatio = maxSize / avgSize;
if (skewRatio > 3.0) { // 阈值可配置
alertManager.sendAlert(
AlertLevel.WARNING,
"Data skew detected",
String.format("Skew ratio: %.2f, max partition: %d, avg: %.2f",
skewRatio, maxSize, avgSize)
);
// 自动触发再平衡
rebalancePartitions(job);
}
}
/**
* 自动调优参数
*/
public void autoTuneParameters(SparkSession spark, Dataset<?> dataset) {
// 动态调整Shuffle分区数
long dataSize = estimateDataSize(dataset);
int optimalPartitions = calculateOptimalPartitions(dataSize);
spark.conf().set("spark.sql.shuffle.partitions",
String.valueOf(optimalPartitions));
// 动态调整Executor内存
MemoryMetrics memoryMetrics = metricsCollector.collectMemoryMetrics();
if (memoryMetrics.getSpillCount() > 100) {
// 频繁spill,增加内存
increaseExecutorMemory(spark, memoryMetrics);
}
// 启用压缩
if (dataSize > 100 * 1024 * 1024) { // 大于100MB
spark.conf().set("spark.shuffle.compress", "true");
spark.conf().set("spark.shuffle.spill.compress", "true");
}
}
/**
* 生成性能报告
*/
public PerformanceReport generateReport(SortingJob job) {
PerformanceReport report = new PerformanceReport();
// 收集时间指标
long totalTime = job.getTotalTime();
long shuffleTime = job.getShuffleTime();
long sortTime = job.getSortTime();
long ioTime = job.getIoTime();
report.setTotalTime(totalTime);
report.setShuffleTime(shuffleTime);
report.setSortTime(sortTime);
report.setIoTime(ioTime);
// 计算效率指标
double shuffleRatio = (double) shuffleTime / totalTime;
double sortRatio = (double) sortTime / totalTime;
double ioRatio = (double) ioTime / totalTime;
report.setShuffleRatio(shuffleRatio);
report.setSortRatio(sortRatio);
report.setIoRatio(ioRatio);
// 收集资源使用情况
report.setCpuUsage(job.getAvgCpuUsage());
report.setMemoryUsage(job.getAvgMemoryUsage());
report.setNetworkUsage(job.getNetworkTraffic());
// 生成优化建议
List<OptimizationSuggestion> suggestions = generateSuggestions(report);
report.setSuggestions(suggestions);
return report;
}
private List<OptimizationSuggestion> generateSuggestions(PerformanceReport report) {
List<OptimizationSuggestion> suggestions = new ArrayList<>();
// 分析瓶颈并提出建议
if (report.getShuffleRatio() > 0.5) {
suggestions.add(new OptimizationSuggestion(
"Shuffle优化",
"Shuffle时间占比过高,建议:\n" +
"1. 增加shuffle分区数\n" +
"2. 启用shuffle压缩\n" +
"3. 使用更好的序列化器",
Priority.HIGH
));
}
if (report.getIoRatio() > 0.3) {
suggestions.add(new OptimizationSuggestion(
"I/O优化",
"I/O时间占比过高,建议:\n" +
"1. 使用列式存储格式(Parquet/ORC)\n" +
"2. 启用谓词下推\n" +
"3. 增加内存缓存",
Priority.MEDIUM
));
}
if (report.getMemoryUsage() > 0.8) {
suggestions.add(new OptimizationSuggestion(
"内存优化",
"内存使用率过高,建议:\n" +
"1. 增加Executor内存\n" +
"2. 减少并行度\n" +
"3. 优化数据分区",
Priority.HIGH
));
}
return suggestions;
}
}
六、实战案例与最佳实践
1. 电商平台交易数据排序案例
java
复制
下载
/**
* 电商交易数据排序实战
*/
public class ECommerceTransactionSorting {
/**
* 案例需求:
* 1. 按交易时间排序,获取最新交易
* 2. 按用户分组,获取每个用户的交易历史
* 3. 按金额排序,获取高价值交易
* 4. 多维度联合排序
*/
public static void main(String[] args) {
SparkSession spark = SparkSession.builder()
.appName("ECommerce Transaction Sorting")
.config("spark.executor.memory", "8g")
.config("spark.executor.cores", "4")
.config("spark.dynamicAllocation.enabled", "true")
.getOrCreate();
// 读取交易数据(Parquet格式)
Dataset<Row> transactions = spark.read()
.parquet("hdfs://transactions/*.parquet")
.cache(); // 缓存频繁访问的数据
// 场景1:按时间排序获取最新交易(全排序)
Dataset<Row> latestTransactions = transactions
.orderBy(col("transaction_time").desc())
.limit(1000000);
// 场景2:按用户分组排序(二次排序)
WindowSpec userWindow = Window
.partitionBy("user_id")
.orderBy(col("transaction_time").desc());
Dataset<Row> userTransactionHistory = transactions
.withColumn("row_num", row_number().over(userWindow))
.filter(col("row_num") <= 100) // 每个用户最近100笔交易
.drop("row_num");
// 场景3:高价值交易排序(Top-N模式)
Dataset<Row> highValueTransactions = transactions
.select("transaction_id", "user_id", "amount", "transaction_time")
.filter(col("amount") > 1000)
.sortWithinPartitions(col("amount").desc())
.repartition(100, col("user_id")) // 基于用户ID重新分区
.sort(col("amount").desc());
// 场景4:多维度联合排序(时间+金额+用户)
Dataset<Row> multiDimSorted = transactions
.repartitionByRange(1000,
col("transaction_date"),
col("user_segment"))
.sortWithinPartitions(
col("transaction_time").desc(),
col("amount").desc(),
col("user_id"))
.sort(col("transaction_date").desc(),
col("user_segment"));
// 写入排序结果
String outputPath = "hdfs://sorted-transactions/";
latestTransactions.write()
.mode(SaveMode.Overwrite)
.parquet(outputPath + "/latest");
userTransactionHistory.write()
.mode(SaveMode.Overwrite)
.parquet(outputPath + "/by_user");
highValueTransactions.write()
.mode(SaveMode.Overwrite)
.parquet(outputPath + "/high_value");
// 创建索引加速查询
createIndexes(spark, outputPath);
spark.stop();
}
/**
* 创建查询索引
*/
private static void createIndexes(SparkSession spark, String dataPath) {
// 1. 为时间字段创建范围索引
Dataset<Row> timeIndex = spark.read()
.parquet(dataPath + "/latest")
.select("transaction_id", "transaction_time")
.sort("transaction_time");
timeIndex.write()
.mode(SaveMode.Overwrite)
.parquet(dataPath + "/index/time_index");
// 2. 为用户ID创建哈希索引
Dataset<Row> userIdIndex = spark.read()
.parquet(dataPath + "/by_user")
.select("user_id", "transaction_id", "transaction_time")
.sortWithinPartitions("user_id");
userIdIndex.write()
.mode(SaveMode.Overwrite)
.parquet(dataPath + "/index/user_id_index");
// 3. 创建布隆过滤器索引
Dataset<Row> bloomFilterData = spark.read()
.parquet(dataPath + "/high_value")
.select("user_id", "transaction_id");
// 生成布隆过滤器
BloomFilterIndex bloomFilter = createBloomFilter(bloomFilterData);
// 保存布隆过滤器
saveBloomFilter(bloomFilter, dataPath + "/index/bloom_filter");
}
}
2. 最佳实践总结
架构设计最佳实践
java
复制
下载
/**
* 分布式排序最佳实践总结
*/
public class SortingBestPractices {
/**
* 实践1:根据数据特征选择排序模式
*/
public SortStrategy selectOptimalStrategy(DataCharacteristics characteristics) {
if (characteristics.isSortedInput()) {
return SortStrategy.MERGE_ONLY; // 输入已部分有序
} else if (characteristics.getDataSize() < 1_000_000) {
return SortStrategy.SINGLE_NODE; // 小数据量单机排序
} else if (characteristics.hasSkewness()) {
return SortStrategy.SKEW_AWARE; // 有数据倾斜
} else {
return SortStrategy.DISTRIBUTED; // 大数据量分布式排序
}
}
/**
* 实践2:合理的分区策略
*/
public void configureOptimalPartitioning(SparkSession spark,
Dataset<?> dataset) {
long dataSize = estimateDataSize(dataset);
int numCores = getTotalCores(spark);
int memoryPerCore = getMemoryPerCore(spark);
// 计算最优分区数
int optimalPartitions = (int) Math.ceil(
(double) dataSize / (memoryPerCore * numCores * 0.7)
);
// 确保分区数在合理范围内
optimalPartitions = Math.max(100, Math.min(optimalPartitions, 10000));
spark.conf().set("spark.sql.shuffle.partitions",
String.valueOf(optimalPartitions));
}
/**
* 实践3:内存优化配置
*/
public Map<String, String> getMemoryOptimizationConfigs() {
Map<String, String> configs = new HashMap<>();
// 堆外内存配置
configs.put("spark.memory.offHeap.enabled", "true");
configs.put("spark.memory.offHeap.size", "2g");
// 序列化配置
configs.put("spark.serializer",
"org.apache.spark.serializer.KryoSerializer");
// 内存管理
configs.put("spark.memory.fraction", "0.8");
configs.put("spark.memory.storageFraction", "0.3");
// Shuffle优化
configs.put("spark.shuffle.spill.compress", "true");
configs.put("spark.shuffle.compress", "true");
configs.put("spark.io.compression.codec", "lz4");
return configs;
}
/**
* 实践4:容错配置
*/
public Map<String, String> getFaultToleranceConfigs() {
Map<String, String> configs = new HashMap<>();
// Checkpoint配置
configs.put("spark.checkpoint.dir", "hdfs://checkpoints/");
configs.put("spark.sql.streaming.checkpointLocation",
"hdfs://streaming-checkpoints/");
// 推测执行
configs.put("spark.speculation", "true");
configs.put("spark.speculation.interval", "1000");
configs.put("spark.speculation.multiplier", "1.5");
// 重试配置
configs.put("spark.task.maxFailures", "4");
configs.put("spark.stage.maxConsecutiveAttempts", "4");
return configs;
}
/**
* 实践5:监控告警配置
*/
public MonitoringConfig getMonitoringConfig() {
MonitoringConfig config = new MonitoringConfig();
// 性能指标阈值
config.setShuffleSpillThreshold(0.3); // Shuffle spill超过30%时告警
config.setMemoryUsageThreshold(0.85); // 内存使用超过85%时告警
config.setCpuUsageThreshold(0.9); // CPU使用超过90%时告警
config.setDataSkewThreshold(3.0); // 数据倾斜超过3倍时告警
// 监控频率
config.setMetricsCollectionInterval(5000); // 5秒收集一次指标
config.setAlertCheckInterval(60000); // 1分钟检查一次告警
return config;
}
}
总结
分布式排序与聚合是现代大数据处理的核心技术,其设计需要综合考虑数据特征 、计算资源 、性能要求 和成本约束。以下是关键要点:
核心原则:
-
数据本地性优先:尽量减少数据移动
-
增量处理:避免全量重排序
-
资源感知:动态调整计算资源
-
容错设计:确保作业可靠性
-
监控调优:持续优化性能
技术选型建议:
| 场景 | 推荐技术 | 原因 |
|---|---|---|
| 批处理全排序 | MapReduce全排序 | 成熟稳定,容错性好 |
| 交互式查询 | Spark SQL + 索引 | 响应快,支持复杂查询 |
| 实时流排序 | Flink + 时间窗口 | 低延迟,精确一次语义 |
| 海量数据排序 | 分治策略 + 外部排序 | 内存友好,可扩展 |
| 多维排序 | Z-Order + 索引 | 支持复杂查询模式 |
未来趋势:
-
硬件加速:GPU/TPU加速排序算法
-
智能化:AI驱动的自动优化
-
存算分离:云原生架构
-
联邦学习:隐私保护下的分布式排序
-
量子计算:量子排序算法探索
分布式排序与聚合技术仍在快速发展,随着数据量的持续增长和计算需求的多样化,新的优化技术和架构模式将不断涌现。关键在于保持技术敏感性,根据实际业务需求选择最合适的解决方案。