mmap内存映射文件

1. mmap 基本原理

java 复制代码

public class MmapExample {
    public static void main(String[] args) throws Exception {
        // 创建 RandomAccessFile 获取 FileChannel
        RandomAccessFile file = new RandomAccessFile("data.txt", "rw");
        FileChannel channel = file.getChannel();
        
        // 将文件映射到内存
        MappedByteBuffer mappedBuffer = channel.map(
            FileChannel.MapMode.READ_WRITE,  // 映射模式
            0,                              // 起始位置
            channel.size()                  // 映射大小
        );
        
        // 直接操作内存，就像操作普通 ByteBuffer 一样
        byte[] data = new byte[1024];
        mappedBuffer.get(data);
        
        // 修改数据
        mappedBuffer.position(0);
        mappedBuffer.put("Hello mmap".getBytes());
        
        // 强制刷盘（可选）
        mappedBuffer.force();
        
        channel.close();
        file.close();
    }
}

2. mmap 与传统 I/O 对比

传统文件读取流程

bash 复制代码

应用程序 → read() 系统调用 → 内核缓冲区 → 用户空间缓冲区

mmap 文件读取流程

bash 复制代码

应用程序直接访问 → 内存映射区域 ← 文件页缓存

3. mmap 工作模式

java 复制代码

public class MmapModes {
    public void differentModes() throws Exception {
        RandomAccessFile file = new RandomAccessFile("test.dat", "rw");
        FileChannel channel = file.getChannel();
        
        // 三种映射模式
        MappedByteBuffer readOnly = channel.map(
            FileChannel.MapMode.READ_ONLY, 0, channel.size());
        
        MappedByteBuffer readWrite = channel.map(
            FileChannel.MapMode.READ_WRITE, 0, channel.size());
        
        MappedByteBuffer privateCopy = channel.map(
            FileChannel.MapMode.PRIVATE, 0, channel.size());
        
        channel.close();
        file.close();
    }
}

4. mmap 在 Kafka 中的应用

java 复制代码

// Kafka 使用 mmap 进行索引文件操作
public class KafkaIndexMmap {
    
    // 偏移量索引文件 mmap
    private MappedByteBuffer offsetIndexMmap;
    
    // 时间戳索引文件 mmap  
    private MappedByteBuffer timestampIndexMmap;
    
    public void initIndex(String basePath) throws Exception {
        // 映射偏移量索引文件
        File offsetIndexFile = new File(basePath + ".index");
        FileChannel offsetChannel = new RandomAccessFile(offsetIndexFile, "rw").getChannel();
        offsetIndexMmap = offsetChannel.map(FileChannel.MapMode.READ_WRITE, 0, offsetChannel.size());
        
        // 映射时间戳索引文件
        File timestampIndexFile = new File(basePath + ".timeindex");
        FileChannel timestampChannel = new RandomAccessFile(timestampIndexFile, "rw").getChannel();
        timestampIndexMmap = timestampChannel.map(FileChannel.MapMode.READ_WRITE, 0, timestampChannel.size());
    }
    
    // 通过 mmap 快速查找消息位置
    public long findOffset(long targetOffset) {
        // 二分查找在 mmap 缓冲区中进行，非常快速
        int position = binarySearchInMmap(offsetIndexMmap, targetOffset);
        if (position >= 0) {
            return offsetIndexMmap.getLong(position * 16 + 8); // 获取物理位置
        }
        return -1;
    }
    
    private int binarySearchInMmap(MappedByteBuffer buffer, long target) {
        // 在内存映射区域进行二分查找
        int low = 0;
        int high = (buffer.limit() / 16) - 1; // 每条索引记录16字节
        
        while (low <= high) {
            int mid = (low + high) >>> 1;
            long midOffset = buffer.getLong(mid * 16);
            
            if (midOffset < target) {
                low = mid + 1;
            } else if (midOffset > target) {
                high = mid - 1;
            } else {
                return mid;
            }
        }
        return -(low + 1);
    }
}

5. mmap 与 sendfile 对比

特性	mmap	sendfile
数据流向	文件 ↔ 页缓存 ↔ 用户空间	文件 → 页缓存 → 网卡
用户空间访问	可直接读写	不可访问数据
适用场景	随机访问、频繁修改	一次性传输、只读
内存占用	整个映射区域	仅缓冲区大小
系统调用	mmap/munmap	sendfile

6. mmap 高级用法

大文件分块映射

java 复制代码

public class LargeFileMmap {
    private static final long MAPPING_SIZE = 1024 * 1024 * 1024; // 1GB
    
    public void processLargeFile(String filePath) throws Exception {
        RandomAccessFile file = new RandomAccessFile(filePath, "rw");
        FileChannel channel = file.getChannel();
        long fileSize = channel.size();
        
        // 分块映射大文件
        for (long offset = 0; offset < fileSize; offset += MAPPING_SIZE) {
            long size = Math.min(MAPPING_SIZE, fileSize - offset);
            
            MappedByteBuffer buffer = channel.map(
                FileChannel.MapMode.READ_WRITE, offset, size);
            
            processChunk(buffer, offset, size);
            
            // 强制刷盘当前块
            buffer.force();
        }
        
        channel.close();
        file.close();
    }
    
    private void processChunk(MappedByteBuffer buffer, long offset, long size) {
        // 处理数据块
        while (buffer.hasRemaining()) {
            byte b = buffer.get();
            // 处理每个字节...
        }
    }
}

共享内存通信

java 复制代码

// 进程 A - 写入数据
public class MmapWriter {
    public static void main(String[] args) throws Exception {
        RandomAccessFile file = new RandomAccessFile("/tmp/shared_memory", "rw");
        FileChannel channel = file.getChannel();
        MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 4096);
        
        // 写入数据到共享内存
        buffer.putInt(123);
        buffer.putLong(System.currentTimeMillis());
        buffer.put("Hello from Process A".getBytes());
        
        buffer.force();
        Thread.sleep(60000); // 保持映射
    }
}

// 进程 B - 读取数据
public class MmapReader {
    public static void main(String[] args) throws Exception {
        RandomAccessFile file = new RandomAccessFile("/tmp/shared_memory", "rw");
        FileChannel channel = file.getChannel();
        MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 4096);
        
        // 从共享内存读取数据
        int value = buffer.getInt();
        long timestamp = buffer.getLong();
        
        byte[] strBytes = new byte[20];
        buffer.get(strBytes);
        String message = new String(strBytes).trim();
        
        System.out.println("Read: " + value + ", " + timestamp + ", " + message);
    }
}

7. mmap 性能优化技巧

java 复制代码

public class MmapOptimization {
    
    // 预分配文件大小，避免动态扩展
    public void preallocateFile(String filePath, long size) throws Exception {
        RandomAccessFile file = new RandomAccessFile(filePath, "rw");
        file.setLength(size); // 预分配空间
        file.close();
    }
    
    // 使用对齐的映射大小
    public void alignedMapping() throws Exception {
        RandomAccessFile file = new RandomAccessFile("data.bin", "rw");
        FileChannel channel = file.getChannel();
        
        // 使用页面大小对齐（通常4KB）
        long pageSize = 4096;
        long alignedSize = (channel.size() + pageSize - 1) & ~(pageSize - 1);
        
        MappedByteBuffer buffer = channel.map(
            FileChannel.MapMode.READ_WRITE, 0, alignedSize);
    }
    
    // 批量操作减少系统调用
    public void batchOperations(MappedByteBuffer buffer) {
        byte[] batchData = new byte[8192];
        
        // 批量读取
        buffer.get(batchData);
        
        // 处理数据...
        
        // 批量写入
        buffer.position(0);
        buffer.put(batchData);
    }
}

8. mmap 注意事项

java 复制代码

public class MmapCaveats {
    
    // 1. 资源释放问题
    public void resourceManagement() throws Exception {
        RandomAccessFile file = new RandomAccessFile("temp.dat", "rw");
        FileChannel channel = file.getChannel();
        MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 1024);
        
        // 手动清理（Java 9+）
        if (buffer instanceof sun.nio.ch.DirectBuffer) {
            sun.misc.Cleaner cleaner = ((sun.nio.ch.DirectBuffer) buffer).cleaner();
            if (cleaner != null) {
                cleaner.clean();
            }
        }
        
        channel.close();
        file.close();
    }
    
    // 2. 处理大文件时的内存限制
    public void handleLargeFiles() throws Exception {
        // 32位系统单个映射限制约1.5-2GB
        // 64位系统限制主要受地址空间和物理内存限制
        
        RandomAccessFile file = new RandomAccessFile("large.dat", "rw");
        FileChannel channel = file.getChannel();
        
        // 分块处理大文件
        long chunkSize = 256 * 1024 * 1024; // 256MB
        long fileSize = channel.size();
        
        for (long offset = 0; offset < fileSize; offset += chunkSize) {
            long size = Math.min(chunkSize, fileSize - offset);
            MappedByteBuffer buffer = channel.map(
                FileChannel.MapMode.READ_WRITE, offset, size);
            
            // 处理当前块
            processBuffer(buffer);
            
            // 及时释放当前映射
            unmapBuffer(buffer);
        }
        
        channel.close();
        file.close();
    }
    
    // 手动解除映射（使用反射）
    private void unmapBuffer(MappedByteBuffer buffer) {
        try {
            java.lang.reflect.Method cleanerMethod = buffer.getClass()
                .getMethod("cleaner");
            cleanerMethod.setAccessible(true);
            Object cleaner = cleanerMethod.invoke(buffer);
            if (cleaner != null) {
                java.lang.reflect.Method cleanMethod = cleaner.getClass()
                    .getMethod("clean");
                cleanMethod.invoke(cleaner);
            }
        } catch (Exception e) {
            // 忽略异常
        }
    }
}
总结

mmap 是一种强大的零复制技术，特别适合：

随机访问：数据库索引、搜索索引
频繁修改：内存数据库、缓存系统
进程间通信：共享内存
大文件处理：日志分析、数据处理

在 Kafka 中，mmap 主要用于索引文件的快速访问，而消息数据的传输主要使用 sendfile。两者结合使用，实现了 Kafka 的高性能特性。