1. mmap 基本原理
java
public class MmapExample {
public static void main(String[] args) throws Exception {
// 创建 RandomAccessFile 获取 FileChannel
RandomAccessFile file = new RandomAccessFile("data.txt", "rw");
FileChannel channel = file.getChannel();
// 将文件映射到内存
MappedByteBuffer mappedBuffer = channel.map(
FileChannel.MapMode.READ_WRITE, // 映射模式
0, // 起始位置
channel.size() // 映射大小
);
// 直接操作内存,就像操作普通 ByteBuffer 一样
byte[] data = new byte[1024];
mappedBuffer.get(data);
// 修改数据
mappedBuffer.position(0);
mappedBuffer.put("Hello mmap".getBytes());
// 强制刷盘(可选)
mappedBuffer.force();
channel.close();
file.close();
}
}
2. mmap 与传统 I/O 对比
传统文件读取流程
bash
应用程序 → read() 系统调用 → 内核缓冲区 → 用户空间缓冲区
mmap 文件读取流程
bash
应用程序直接访问 → 内存映射区域 ← 文件页缓存
3. mmap 工作模式
java
public class MmapModes {
public void differentModes() throws Exception {
RandomAccessFile file = new RandomAccessFile("test.dat", "rw");
FileChannel channel = file.getChannel();
// 三种映射模式
MappedByteBuffer readOnly = channel.map(
FileChannel.MapMode.READ_ONLY, 0, channel.size());
MappedByteBuffer readWrite = channel.map(
FileChannel.MapMode.READ_WRITE, 0, channel.size());
MappedByteBuffer privateCopy = channel.map(
FileChannel.MapMode.PRIVATE, 0, channel.size());
channel.close();
file.close();
}
}
4. mmap 在 Kafka 中的应用
java
// Kafka 使用 mmap 进行索引文件操作
public class KafkaIndexMmap {
// 偏移量索引文件 mmap
private MappedByteBuffer offsetIndexMmap;
// 时间戳索引文件 mmap
private MappedByteBuffer timestampIndexMmap;
public void initIndex(String basePath) throws Exception {
// 映射偏移量索引文件
File offsetIndexFile = new File(basePath + ".index");
FileChannel offsetChannel = new RandomAccessFile(offsetIndexFile, "rw").getChannel();
offsetIndexMmap = offsetChannel.map(FileChannel.MapMode.READ_WRITE, 0, offsetChannel.size());
// 映射时间戳索引文件
File timestampIndexFile = new File(basePath + ".timeindex");
FileChannel timestampChannel = new RandomAccessFile(timestampIndexFile, "rw").getChannel();
timestampIndexMmap = timestampChannel.map(FileChannel.MapMode.READ_WRITE, 0, timestampChannel.size());
}
// 通过 mmap 快速查找消息位置
public long findOffset(long targetOffset) {
// 二分查找在 mmap 缓冲区中进行,非常快速
int position = binarySearchInMmap(offsetIndexMmap, targetOffset);
if (position >= 0) {
return offsetIndexMmap.getLong(position * 16 + 8); // 获取物理位置
}
return -1;
}
private int binarySearchInMmap(MappedByteBuffer buffer, long target) {
// 在内存映射区域进行二分查找
int low = 0;
int high = (buffer.limit() / 16) - 1; // 每条索引记录16字节
while (low <= high) {
int mid = (low + high) >>> 1;
long midOffset = buffer.getLong(mid * 16);
if (midOffset < target) {
low = mid + 1;
} else if (midOffset > target) {
high = mid - 1;
} else {
return mid;
}
}
return -(low + 1);
}
}
5. mmap 与 sendfile 对比
| 特性 | mmap | sendfile |
|---|---|---|
| 数据流向 | 文件 ↔ 页缓存 ↔ 用户空间 | 文件 → 页缓存 → 网卡 |
| 用户空间访问 | 可直接读写 | 不可访问数据 |
| 适用场景 | 随机访问、频繁修改 | 一次性传输、只读 |
| 内存占用 | 整个映射区域 | 仅缓冲区大小 |
| 系统调用 | mmap/munmap | sendfile |
6. mmap 高级用法
大文件分块映射
java
public class LargeFileMmap {
private static final long MAPPING_SIZE = 1024 * 1024 * 1024; // 1GB
public void processLargeFile(String filePath) throws Exception {
RandomAccessFile file = new RandomAccessFile(filePath, "rw");
FileChannel channel = file.getChannel();
long fileSize = channel.size();
// 分块映射大文件
for (long offset = 0; offset < fileSize; offset += MAPPING_SIZE) {
long size = Math.min(MAPPING_SIZE, fileSize - offset);
MappedByteBuffer buffer = channel.map(
FileChannel.MapMode.READ_WRITE, offset, size);
processChunk(buffer, offset, size);
// 强制刷盘当前块
buffer.force();
}
channel.close();
file.close();
}
private void processChunk(MappedByteBuffer buffer, long offset, long size) {
// 处理数据块
while (buffer.hasRemaining()) {
byte b = buffer.get();
// 处理每个字节...
}
}
}
共享内存通信
java
// 进程 A - 写入数据
public class MmapWriter {
public static void main(String[] args) throws Exception {
RandomAccessFile file = new RandomAccessFile("/tmp/shared_memory", "rw");
FileChannel channel = file.getChannel();
MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 4096);
// 写入数据到共享内存
buffer.putInt(123);
buffer.putLong(System.currentTimeMillis());
buffer.put("Hello from Process A".getBytes());
buffer.force();
Thread.sleep(60000); // 保持映射
}
}
// 进程 B - 读取数据
public class MmapReader {
public static void main(String[] args) throws Exception {
RandomAccessFile file = new RandomAccessFile("/tmp/shared_memory", "rw");
FileChannel channel = file.getChannel();
MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 4096);
// 从共享内存读取数据
int value = buffer.getInt();
long timestamp = buffer.getLong();
byte[] strBytes = new byte[20];
buffer.get(strBytes);
String message = new String(strBytes).trim();
System.out.println("Read: " + value + ", " + timestamp + ", " + message);
}
}
7. mmap 性能优化技巧
java
public class MmapOptimization {
// 预分配文件大小,避免动态扩展
public void preallocateFile(String filePath, long size) throws Exception {
RandomAccessFile file = new RandomAccessFile(filePath, "rw");
file.setLength(size); // 预分配空间
file.close();
}
// 使用对齐的映射大小
public void alignedMapping() throws Exception {
RandomAccessFile file = new RandomAccessFile("data.bin", "rw");
FileChannel channel = file.getChannel();
// 使用页面大小对齐(通常4KB)
long pageSize = 4096;
long alignedSize = (channel.size() + pageSize - 1) & ~(pageSize - 1);
MappedByteBuffer buffer = channel.map(
FileChannel.MapMode.READ_WRITE, 0, alignedSize);
}
// 批量操作减少系统调用
public void batchOperations(MappedByteBuffer buffer) {
byte[] batchData = new byte[8192];
// 批量读取
buffer.get(batchData);
// 处理数据...
// 批量写入
buffer.position(0);
buffer.put(batchData);
}
}
8. mmap 注意事项
java
public class MmapCaveats {
// 1. 资源释放问题
public void resourceManagement() throws Exception {
RandomAccessFile file = new RandomAccessFile("temp.dat", "rw");
FileChannel channel = file.getChannel();
MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 1024);
// 手动清理(Java 9+)
if (buffer instanceof sun.nio.ch.DirectBuffer) {
sun.misc.Cleaner cleaner = ((sun.nio.ch.DirectBuffer) buffer).cleaner();
if (cleaner != null) {
cleaner.clean();
}
}
channel.close();
file.close();
}
// 2. 处理大文件时的内存限制
public void handleLargeFiles() throws Exception {
// 32位系统单个映射限制约1.5-2GB
// 64位系统限制主要受地址空间和物理内存限制
RandomAccessFile file = new RandomAccessFile("large.dat", "rw");
FileChannel channel = file.getChannel();
// 分块处理大文件
long chunkSize = 256 * 1024 * 1024; // 256MB
long fileSize = channel.size();
for (long offset = 0; offset < fileSize; offset += chunkSize) {
long size = Math.min(chunkSize, fileSize - offset);
MappedByteBuffer buffer = channel.map(
FileChannel.MapMode.READ_WRITE, offset, size);
// 处理当前块
processBuffer(buffer);
// 及时释放当前映射
unmapBuffer(buffer);
}
channel.close();
file.close();
}
// 手动解除映射(使用反射)
private void unmapBuffer(MappedByteBuffer buffer) {
try {
java.lang.reflect.Method cleanerMethod = buffer.getClass()
.getMethod("cleaner");
cleanerMethod.setAccessible(true);
Object cleaner = cleanerMethod.invoke(buffer);
if (cleaner != null) {
java.lang.reflect.Method cleanMethod = cleaner.getClass()
.getMethod("clean");
cleanMethod.invoke(cleaner);
}
} catch (Exception e) {
// 忽略异常
}
}
}
总结
mmap 是一种强大的零复制技术,特别适合:
-
随机访问:数据库索引、搜索索引
-
频繁修改:内存数据库、缓存系统
-
进程间通信:共享内存
-
大文件处理:日志分析、数据处理
在 Kafka 中,mmap 主要用于索引文件的快速访问,而消息数据的传输主要使用 sendfile。两者结合使用,实现了 Kafka 的高性能特性。