Linux 系统编程 · 第 3 章：文件 I/O 基础

本章深入讲解 Linux 文件 I/O 的核心接口：open、read、write、close，以及文件描述符的底层机制，是所有 I/O 编程的基础。

[文件描述符（File Descriptor）](#文件描述符（File Descriptor）)
[open / openat --- 打开文件](#open / openat — 打开文件)
[read --- 读取数据](#read — 读取数据)
[write --- 写入数据](#write — 写入数据)
[close --- 关闭文件](#close — 关闭文件)
[lseek --- 文件偏移量](#lseek — 文件偏移量)
文件描述符的内核数据结构
[高级 I/O 操作](#高级 I/O 操作)
综合实践

1. 文件描述符（File Descriptor）

1.1 概念与本质

文件描述符（fd）是内核为每个打开文件维护的非负整数索引，是进程与内核之间 I/O 操作的"句柄"。

复制代码

进程视角（用户空间）          内核视角
─────────────────────────────────────────────────────────────
  文件描述符表（每进程一份）
  ┌────┬──────────────┐
  │ fd │  指针         │
  ├────┼──────────────┤
  │  0 │ ──────────────────► 打开文件表项（stdin）
  │  1 │ ──────────────────► 打开文件表项（stdout）
  │  2 │ ──────────────────► 打开文件表项（stderr）
  │  3 │ ──────────────────► 打开文件表项（用户打开的文件）
  │  4 │ ──────────────────► 打开文件表项（套接字等）
  │ .. │  ...          │
  └────┴──────────────┘
         （进程级）

  打开文件表（系统全局）
  ┌──────────────────────────────────────┐
  │ 文件偏移量 (offset)                   │
  │ 访问模式   (O_RDONLY/O_WRONLY/...)    │
  │ 状态标志   (O_NONBLOCK/O_APPEND/...)  │
  │ inode 指针 ──────────────────────────────► inode（磁盘文件元数据）
  └──────────────────────────────────────┘
         （系统级，可被多个 fd 共享）

1.2 标准文件描述符

复制代码

/* 文件名：fd_basics.c
 * 演示标准文件描述符及其基本属性
 */
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <string.h>

/* 打印 fd 的详细信息 */
void print_fd_info(int fd, const char *name) {
    /* fcntl(fd, F_GETFL) 获取文件状态标志 */
    int flags = fcntl(fd, F_GETFL);
    if (flags == -1) {
        printf("  %-10s fd=%-3d  [无效]\n", name, fd);
        return;
    }

    /* 访问模式（低2位）*/
    const char *mode;
    switch (flags & O_ACCMODE) {
        case O_RDONLY: mode = "只读(O_RDONLY)"; break;
        case O_WRONLY: mode = "只写(O_WRONLY)"; break;
        case O_RDWR:   mode = "读写(O_RDWR)";   break;
        default:       mode = "未知";
    }

    /* 是否是终端 */
    int is_tty = isatty(fd);

    printf("  %-10s fd=%-3d  模式=%-20s  终端=%s  APPEND=%s\n",
           name, fd, mode,
           is_tty ? "是" : "否",
           (flags & O_APPEND) ? "是" : "否");
}

int main(void) {
    printf("=== 标准文件描述符信息 ===\n");
    print_fd_info(STDIN_FILENO,  "stdin");    /* 0 */
    print_fd_info(STDOUT_FILENO, "stdout");   /* 1 */
    print_fd_info(STDERR_FILENO, "stderr");   /* 2 */

    /* 打开一个普通文件，观察 fd 分配规则（最小可用整数）*/
    int fd3 = open("/etc/hostname", O_RDONLY);
    int fd4 = open("/etc/os-release", O_RDONLY);
    printf("\n打开两个文件后：\n");
    print_fd_info(fd3, "/etc/hostname");
    print_fd_info(fd4, "/etc/os-release");

    /* fd 分配规则：总是分配当前最小的可用整数 */
    close(fd3);
    int fd3_new = open("/etc/passwd", O_RDONLY);
    printf("\n关闭 fd=%d 后重新打开文件，新 fd=%d（复用了 fd=3）\n",
           fd3, fd3_new);

    close(fd3_new);
    close(fd4);

    /* 查看进程打开的所有 fd */
    printf("\n=== /proc/self/fd 中的文件描述符 ===\n");
    system("ls -la /proc/self/fd 2>/dev/null");

    return 0;
}

复制代码

gcc -o fd_basics fd_basics.c
./fd_basics
# 输出示例：
# === 标准文件描述符信息 ===
#   stdin      fd=0    模式=只读(O_RDONLY)       终端=是  APPEND=否
#   stdout     fd=1    模式=只写(O_WRONLY)       终端=是  APPEND=否
#   stderr     fd=2    模式=只写(O_WRONLY)       终端=是  APPEND=否
#
# 打开两个文件后：
#   /etc/hostname  fd=3    模式=只读(O_RDONLY)   终端=否  APPEND=否
#   /etc/os-release fd=4   模式=只读(O_RDONLY)   终端=否  APPEND=否
#
# 关闭 fd=3 后重新打开文件，新 fd=3（复用了 fd=3）

1.3 文件描述符限制

复制代码

# 查看进程级 fd 限制
ulimit -n              # 软限制（当前生效）
ulimit -Hn             # 硬限制（上限）

# 查看系统级 fd 限制
cat /proc/sys/fs/file-max        # 系统最大打开文件数
cat /proc/sys/fs/file-nr         # 已分配/空闲/最大

# 临时修改软限制（不超过硬限制）
ulimit -n 65536

# 查看某进程的 fd 使用情况
ls /proc/$$/fd | wc -l

复制代码

/* 文件名：fd_limit.c
 * 演示文件描述符耗尽的情况与处理
 */
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/resource.h>

int main(void) {
    /* 获取当前 fd 限制 */
    struct rlimit rl;
    getrlimit(RLIMIT_NOFILE, &rl);
    printf("fd 软限制: %lu\n", (unsigned long)rl.rlim_cur);
    printf("fd 硬限制: %lu\n", (unsigned long)rl.rlim_max);

    /* 临时降低限制以演示耗尽 */
    struct rlimit new_rl = { .rlim_cur = 10, .rlim_max = rl.rlim_max };
    setrlimit(RLIMIT_NOFILE, &new_rl);

    printf("\n将 fd 限制降低到 10，尝试打开文件直到耗尽：\n");
    int count = 0;
    int fds[32];
    while (1) {
        int fd = open("/dev/null", O_RDONLY);
        if (fd == -1) {
            /* EMFILE：进程打开文件数超过限制 */
            printf("第 %d 次 open 失败: errno=%d (%s)\n",
                   count + 1, errno, strerror(errno));
            break;
        }
        fds[count++] = fd;
        printf("  打开第 %d 个 fd = %d\n", count, fd);
    }

    /* 关闭所有打开的 fd */
    for (int i = 0; i < count; i++) close(fds[i]);

    /* 恢复限制 */
    setrlimit(RLIMIT_NOFILE, &rl);
    printf("\nfd 限制已恢复为 %lu\n", (unsigned long)rl.rlim_cur);

    return 0;
}

复制代码

gcc -o fd_limit fd_limit.c
./fd_limit
# 输出示例：
# fd 软限制: 1024
# fd 硬限制: 1048576
#
# 将 fd 限制降低到 10，尝试打开文件直到耗尽：
#   打开第 1 个 fd = 3
#   打开第 2 个 fd = 4
#   ...
#   打开第 7 个 fd = 9
# 第 8 次 open 失败: errno=24 (Too many open files)

2. open / openat --- 打开文件

2.1 函数原型与参数

复制代码

#include <fcntl.h>

/* 打开或创建文件，返回文件描述符（失败返回 -1）*/
int open(const char *pathname, int flags);
int open(const char *pathname, int flags, mode_t mode);  /* 创建文件时需要 mode */

/* openat：相对于目录 fd 打开文件（防止 TOCTOU 竞争）*/
int openat(int dirfd, const char *pathname, int flags, mode_t mode);

2.2 flags 标志详解

复制代码

flags 由两部分组成（用 | 组合）：

【访问模式（必选其一）】
  O_RDONLY   = 0   只读
  O_WRONLY   = 1   只写
  O_RDWR     = 2   读写

【文件创建标志（可选）】
  O_CREAT        文件不存在则创建（需要 mode 参数）
  O_EXCL         与 O_CREAT 合用：文件已存在则报错 EEXIST
  O_TRUNC        打开时截断文件为 0 字节
  O_TMPFILE      创建匿名临时文件（Linux 3.11+）

【文件状态标志（可选）】
  O_APPEND       每次写入前自动定位到文件末尾（原子操作）
  O_NONBLOCK     非阻塞模式（对管道、设备有效）
  O_SYNC         每次 write 等待数据写入磁盘（数据+元数据）
  O_DSYNC        每次 write 等待数据写入磁盘（仅数据）
  O_DIRECT       绕过内核页缓存（直接 I/O）
  O_CLOEXEC      exec 时自动关闭（防止 fd 泄漏到子进程）
  O_NOFOLLOW     不跟随符号链接
  O_DIRECTORY    要求 pathname 是目录
  O_LARGEFILE    支持大文件（64位系统默认支持）

复制代码

/* 文件名：open_flags.c
 * 演示各种 open flags 的使用场景
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>

#define TEST_DIR "/tmp/open_flags_demo"

int main(void) {
    /* 创建测试目录 */
    mkdir(TEST_DIR, 0755);

    /* ── 场景1：O_CREAT | O_EXCL 原子创建（防止覆盖）── */
    printf("=== 场景1：O_CREAT | O_EXCL 原子创建 ===\n");
    int fd = open(TEST_DIR "/new_file.txt",
                  O_WRONLY | O_CREAT | O_EXCL, 0644);
    if (fd == -1) {
        perror("第一次创建");
    } else {
        printf("第一次创建成功，fd=%d\n", fd);
        write(fd, "hello\n", 6);
        close(fd);
    }

    /* 再次创建同名文件，应该失败（EEXIST）*/
    fd = open(TEST_DIR "/new_file.txt",
              O_WRONLY | O_CREAT | O_EXCL, 0644);
    if (fd == -1) {
        printf("第二次创建失败（预期）: %s\n", strerror(errno));
    }

    /* ── 场景2：O_APPEND 追加写入（多进程安全）── */
    printf("\n=== 场景2：O_APPEND 追加写入 ===\n");
    fd = open(TEST_DIR "/append.log",
              O_WRONLY | O_CREAT | O_APPEND, 0644);
    write(fd, "第1行日志\n", 10);
    write(fd, "第2行日志\n", 10);
    close(fd);

    /* 再次打开追加，不会覆盖原内容 */
    fd = open(TEST_DIR "/append.log",
              O_WRONLY | O_CREAT | O_APPEND, 0644);
    write(fd, "第3行日志\n", 10);
    close(fd);

    printf("追加写入结果：\n");
    system("cat " TEST_DIR "/append.log");

    /* ── 场景3：O_TRUNC 截断文件 ── */
    printf("\n=== 场景3：O_TRUNC 截断文件 ===\n");
    fd = open(TEST_DIR "/append.log",
              O_WRONLY | O_TRUNC, 0644);
    write(fd, "截断后重写\n", 11);
    close(fd);
    printf("截断后内容：\n");
    system("cat " TEST_DIR "/append.log");

    /* ── 场景4：O_CLOEXEC 防止 fd 泄漏到子进程 ── */
    printf("\n=== 场景4：O_CLOEXEC 防止 fd 泄漏 ===\n");
    int fd_leak   = open("/etc/hostname", O_RDONLY);           /* 无 CLOEXEC */
    int fd_safe   = open("/etc/hostname", O_RDONLY | O_CLOEXEC); /* 有 CLOEXEC */
    printf("fd_leak=%d（exec后子进程可见）\n", fd_leak);
    printf("fd_safe=%d（exec后自动关闭）\n", fd_safe);

    /* 验证 FD_CLOEXEC 标志 */
    int flags_leak = fcntl(fd_leak, F_GETFD);
    int flags_safe = fcntl(fd_safe, F_GETFD);
    printf("fd_leak FD_CLOEXEC: %s\n",
           (flags_leak & FD_CLOEXEC) ? "已设置" : "未设置");
    printf("fd_safe FD_CLOEXEC: %s\n",
           (flags_safe & FD_CLOEXEC) ? "已设置" : "未设置");
    close(fd_leak);
    close(fd_safe);

    /* ── 场景5：O_SYNC 同步写入（数据安全）── */
    printf("\n=== 场景5：O_SYNC 同步写入 ===\n");
    fd = open(TEST_DIR "/sync.dat",
              O_WRONLY | O_CREAT | O_TRUNC | O_SYNC, 0644);
    /* 每次 write 都会等待数据真正写入磁盘，适合日志/数据库 */
    write(fd, "重要数据，必须持久化\n", 22);
    printf("O_SYNC write 完成（数据已落盘）\n");
    close(fd);

    /* 清理 */
    system("rm -rf " TEST_DIR);
    return 0;
}

复制代码

gcc -o open_flags open_flags.c
./open_flags
# 输出示例：
# === 场景1：O_CREAT | O_EXCL 原子创建 ===
# 第一次创建成功，fd=3
# 第二次创建失败（预期）: File exists
#
# === 场景2：O_APPEND 追加写入 ===
# 追加写入结果：
# 第1行日志
# 第2行日志
# 第3行日志
#
# === 场景3：O_TRUNC 截断文件 ===
# 截断后内容：
# 截断后重写
#
# === 场景4：O_CLOEXEC 防止 fd 泄漏 ===
# fd_leak=3（exec后子进程可见）
# fd_safe=4（exec后自动关闭）
# fd_leak FD_CLOEXEC: 未设置
# fd_safe FD_CLOEXEC: 已设置

2.3 mode 权限参数

复制代码

/* 文件名：open_mode.c
 * 演示 mode 参数与 umask 的交互
 */
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>

/* 将 mode_t 转为 rwxrwxrwx 字符串 */
void mode_to_str(mode_t mode, char *buf) {
    buf[0] = (mode & S_IRUSR) ? 'r' : '-';
    buf[1] = (mode & S_IWUSR) ? 'w' : '-';
    buf[2] = (mode & S_IXUSR) ? 'x' : '-';
    buf[3] = (mode & S_IRGRP) ? 'r' : '-';
    buf[4] = (mode & S_IWGRP) ? 'w' : '-';
    buf[5] = (mode & S_IXGRP) ? 'x' : '-';
    buf[6] = (mode & S_IROTH) ? 'r' : '-';
    buf[7] = (mode & S_IWOTH) ? 'w' : '-';
    buf[8] = (mode & S_IXOTH) ? 'x' : '-';
    buf[9] = '\0';
}

int main(void) {
    char perm[10];
    struct stat st;

    /* umask 会从 mode 中屏蔽掉对应位
     * 实际权限 = mode & ~umask
     * 默认 umask = 022，屏蔽组写和其他写
     */
    mode_t old_umask = umask(0);   /* 临时设为 0，不屏蔽任何位 */
    printf("当前 umask: %03o（已临时设为 0）\n\n", old_umask);

    /* 测试不同 mode 值 */
    struct { mode_t mode; const char *desc; } tests[] = {
        { 0777, "0777 (rwxrwxrwx)" },
        { 0755, "0755 (rwxr-xr-x)" },
        { 0644, "0644 (rw-r--r--)" },
        { 0600, "0600 (rw-------)" },
        { 0400, "0400 (r--------)" },
        { 0, NULL }
    };

    for (int i = 0; tests[i].desc; i++) {
        char path[64];
        snprintf(path, sizeof(path), "/tmp/mode_test_%03o.txt", tests[i].mode);

        int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, tests[i].mode);
        if (fd == -1) { perror("open"); continue; }
        close(fd);

        stat(path, &st);
        mode_to_str(st.st_mode & 0777, perm);
        printf("mode=%-25s → 实际权限: %s (%03o)\n",
               tests[i].desc, perm, (unsigned)(st.st_mode & 0777));
        unlink(path);
    }

    /* 恢复 umask 并演示其效果 */
    umask(old_umask);
    printf("\n恢复 umask=%03o 后：\n", old_umask);
    int fd = open("/tmp/umask_test.txt",
                  O_WRONLY | O_CREAT | O_TRUNC, 0666);
    stat("/tmp/umask_test.txt", &st);
    mode_to_str(st.st_mode & 0777, perm);
    printf("mode=0666, umask=%03o → 实际权限: %s (%03o)\n",
           old_umask, perm, (unsigned)(st.st_mode & 0777));
    printf("计算：0666 & ~0%03o = 0%03o\n",
           old_umask, (unsigned)(0666 & ~old_umask));
    close(fd);
    unlink("/tmp/umask_test.txt");

    return 0;
}

复制代码

gcc -o open_mode open_mode.c
./open_mode
# 输出示例：
# 当前 umask: 022（已临时设为 0）
#
# mode=0777 (rwxrwxrwx)      → 实际权限: rwxrwxrwx (777)
# mode=0755 (rwxr-xr-x)      → 实际权限: rwxr-xr-x (755)
# mode=0644 (rw-r--r--)      → 实际权限: rw-r--r-- (644)
# mode=0600 (rw-------)      → 实际权限: rw------- (600)
# mode=0400 (r--------)      → 实际权限: r-------- (400)
#
# 恢复 umask=022 后：
# mode=0666, umask=022 → 实际权限: rw-r--r-- (644)
# 计算：0666 & ~0022 = 0644

3. read --- 读取数据

3.1 函数原型与行为

复制代码

#include <unistd.h>

/* 从 fd 读取最多 count 字节到 buf
 * 返回值：
 *   > 0  实际读取的字节数（可能 < count）
 *   = 0  到达文件末尾（EOF）
 *   = -1 出错（检查 errno）
 */
ssize_t read(int fd, void *buf, size_t count);

复制代码

read 的返回值情况：
─────────────────────────────────────────────────────────────
  返回 count    完整读取（最理想情况）
  返回 0~count  部分读取（管道/套接字/信号中断等）
  返回 0        EOF（文件末尾，或对端关闭连接）
  返回 -1       出错：
                  EINTR    被信号中断（应重试）
                  EAGAIN   非阻塞模式下无数据可读
                  EIO      I/O 错误
                  EBADF    无效 fd
                  EFAULT   buf 地址无效
─────────────────────────────────────────────────────────────

3.2 read 的各种使用场景

复制代码

/* 文件名：read_demo.c
 * 演示 read 的各种使用场景和边界情况
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>

/* ── 场景1：读取普通文件（循环读取直到 EOF）── */
void read_file_demo(void) {
    printf("=== 场景1：循环读取文件 ===\n");

    int fd = open("/etc/os-release", O_RDONLY);
    if (fd == -1) { perror("open"); return; }

    char buf[64];       /* 故意用小缓冲区，演示多次读取 */
    ssize_t n;
    long total = 0;
    int rounds = 0;

    while ((n = read(fd, buf, sizeof(buf))) > 0) {
        total += n;
        rounds++;
        /* 只打印前两轮的内容 */
        if (rounds <= 2) {
            printf("  第%d次 read：读取 %zd 字节\n", rounds, n);
        }
    }

    if (n == -1) perror("read");
    else printf("  读取完毕（EOF），共 %d 次 read，总计 %ld 字节\n",
                rounds, total);

    close(fd);
}

/* ── 场景2：精确读取指定字节数 ── */
ssize_t read_exact(int fd, void *buf, size_t count) {
    char *ptr = (char *)buf;
    size_t remaining = count;
    ssize_t total = 0;

    while (remaining > 0) {
        ssize_t n = read(fd, ptr, remaining);
        if (n == 0) break;           /* EOF，可能读不满 */
        if (n == -1) {
            if (errno == EINTR) continue;
            return -1;
        }
        ptr       += n;
        remaining -= (size_t)n;
        total     += n;
    }
    return total;
}

void read_exact_demo(void) {
    printf("\n=== 场景2：精确读取指定字节数 ===\n");

    /* 创建测试文件 */
    int fd = open("/tmp/read_exact_test.bin",
                  O_RDWR | O_CREAT | O_TRUNC, 0644);
    const char data[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    write(fd, data, strlen(data));
    lseek(fd, 0, SEEK_SET);

    char buf[10] = {0};
    ssize_t n = read_exact(fd, buf, 10);
    printf("  精确读取 10 字节: [%s]（实际读取 %zd 字节）\n", buf, n);

    n = read_exact(fd, buf, 10);
    printf("  精确读取 10 字节: [%s]（实际读取 %zd 字节）\n", buf, n);

    /* 尝试读取超过剩余字节数 */
    memset(buf, 0, sizeof(buf));
    n = read_exact(fd, buf, 10);
    printf("  尝试读取 10 字节（只剩6字节）: [%s]（实际读取 %zd 字节）\n",
           buf, n);

    close(fd);
    unlink("/tmp/read_exact_test.bin");
}

/* ── 场景3：逐行读取（不使用 stdio）── */
ssize_t read_line(int fd, char *buf, size_t maxlen) {
    size_t i = 0;
    char c;
    ssize_t n;

    while (i < maxlen - 1) {
        n = read(fd, &c, 1);   /* 每次读1字节（效率低，仅演示原理）*/
        if (n == 0) break;     /* EOF */
        if (n == -1) {
            if (errno == EINTR) continue;
            return -1;
        }
        buf[i++] = c;
        if (c == '\n') break;  /* 遇到换行符停止 */
    }
    buf[i] = '\0';
    return (ssize_t)i;
}

void read_line_demo(void) {
    printf("\n=== 场景3：逐行读取 ===\n");

    int fd = open("/etc/os-release", O_RDONLY);
    if (fd == -1) { perror("open"); return; }

    char line[256];
    int line_num = 0;
    ssize_t n;

    while ((n = read_line(fd, line, sizeof(line))) > 0 && line_num < 4) {
        /* 去掉末尾换行符 */
        if (line[n-1] == '\n') line[n-1] = '\0';
        printf("  第%d行: %s\n", ++line_num, line);
    }

    close(fd);
}

/* ── 场景4：读取二进制数据 ── */
void read_binary_demo(void) {
    printf("\n=== 场景4：读取二进制数据（ELF 文件头）===\n");

    int fd = open("/bin/ls", O_RDONLY);
    if (fd == -1) { perror("open /bin/ls"); return; }

    /* ELF 文件头前4字节是魔数：0x7f 'E' 'L' 'F' */
    unsigned char magic[4];
    ssize_t n = read(fd, magic, sizeof(magic));
    if (n == 4) {
        printf("  ELF 魔数: %02x %02x %02x %02x\n",
               magic[0], magic[1], magic[2], magic[3]);
        printf("  是 ELF 文件: %s\n",
               (magic[0] == 0x7f && magic[1] == 'E' &&
                magic[2] == 'L'  && magic[3] == 'F') ? "是 ✓" : "否");
    }

    /* 读取第5字节：EI_CLASS（1=32位，2=64位）*/
    unsigned char ei_class;
    read(fd, &ei_class, 1);
    printf("  架构位数: %s\n", ei_class == 2 ? "64位" : "32位");

    close(fd);
}

int main(void) {
    read_file_demo();
    read_exact_demo();
    read_line_demo();
    read_binary_demo();
    return 0;
}

复制代码

gcc -o read_demo read_demo.c
./read_demo
# 输出示例：
# === 场景1：循环读取文件 ===
#   第1次 read：读取 64 字节
#   第2次 read：读取 64 字节
#   读取完毕（EOF），共 7 次 read，总计 420 字节
#
# === 场景2：精确读取指定字节数 ===
#   精确读取 10 字节: [ABCDEFGHIJ]（实际读取 10 字节）
#   精确读取 10 字节: [KLMNOPQRST]（实际读取 10 字节）
#   尝试读取 10 字节（只剩6字节）: [UVWXYZ]（实际读取 6 字节）
#
# === 场景3：逐行读取 ===
#   第1行: PRETTY_NAME="Ubuntu 22.04.3 LTS"
#   第2行: NAME="Ubuntu"
#   ...
#
# === 场景4：读取二进制数据（ELF 文件头）===
#   ELF 魔数: 7f 45 4c 46
#   是 ELF 文件: 是 ✓
#   架构位数: 64位

4. write --- 写入数据

4.1 函数原型与行为

复制代码

#include <unistd.h>

/* 将 buf 中的 count 字节写入 fd
 * 返回值：
 *   > 0  实际写入的字节数（可能 < count，称为"短写"）
 *   = -1 出错（检查 errno）
 *   注意：write 不会返回 0（除非 count=0）
 */
ssize_t write(int fd, const void *buf, size_t count);

4.2 write 的关键特性与陷阱

复制代码

/* 文件名：write_demo.c
 * 演示 write 的各种使用场景、短写处理和原子性
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/uio.h>   /* writev */

/* ── 场景1：基本写入与短写处理 ── */
void write_basic_demo(void) {
    printf("=== 场景1：基本写入 ===\n");

    int fd = open("/tmp/write_test.txt",
                  O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd == -1) { perror("open"); return; }

    const char *lines[] = {
        "第一行：Hello, Linux!\n",
        "第二行：文件 I/O 基础\n",
        "第三行：write 系统调用\n",
        NULL
    };

    long total = 0;
    for (int i = 0; lines[i]; i++) {
        size_t len = strlen(lines[i]);
        ssize_t n = write(fd, lines[i], len);
        if (n == -1) {
            perror("write");
            break;
        }
        if ((size_t)n < len) {
            /* 短写（short write）：实际写入少于请求字节数
             * 在普通文件中极少发生，但在管道/套接字中常见
             * 生产代码必须处理！
             */
            printf("  短写！请求 %zu 字节，实际写入 %zd 字节\n", len, n);
        }
        total += n;
        printf("  写入第%d行：%zd 字节\n", i + 1, n);
    }
    printf("  总计写入：%ld 字节\n", total);

    close(fd);
    unlink("/tmp/write_test.txt");
}

/* ── 场景2：O_APPEND 的原子性 ── */
void write_append_demo(void) {
    printf("\n=== 场景2：O_APPEND 原子追加 ===\n");

    const char *path = "/tmp/append_atomic.log";

    /* 不使用 O_APPEND（非原子，多进程并发时可能覆盖）*/
    int fd1 = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
    /* 手动 lseek 到末尾再写（非原子！两步操作之间可能被抢占）*/
    lseek(fd1, 0, SEEK_END);
    write(fd1, "进程A写入（非原子）\n", 20);
    close(fd1);

    /* 使用 O_APPEND（原子，lseek+write 合并为一个原子操作）*/
    int fd2 = open(path, O_WRONLY | O_APPEND, 0644);
    write(fd2, "进程B写入（O_APPEND 原子）\n", 26);
    close(fd2);

    printf("文件内容：\n");
    system("cat /tmp/append_atomic.log");
    unlink(path);
}

/* ── 场景3：writev 聚集写入（减少系统调用次数）── */
void writev_demo(void) {
    printf("\n=== 场景3：writev 聚集写入 ===\n");

    int fd = open("/tmp/writev_test.txt",
                  O_WRONLY | O_CREAT | O_TRUNC, 0644);

    /* 将多个不连续的缓冲区一次性写入（原子操作）*/
    const char header[]  = "=== 报告头部 ===\n";
    const char body[]    = "报告正文内容...\n";
    const char footer[]  = "=== 报告尾部 ===\n";

    struct iovec iov[3];
    iov[0].iov_base = (void *)header;  iov[0].iov_len = strlen(header);
    iov[1].iov_base = (void *)body;    iov[1].iov_len = strlen(body);
    iov[2].iov_base = (void *)footer;  iov[2].iov_len = strlen(footer);

    /* writev：一次系统调用写入多个缓冲区（等价于3次 write，但更高效）*/
    ssize_t total = writev(fd, iov, 3);
    printf("writev 一次写入 %zd 字节（3个缓冲区）\n", total);

    close(fd);
    system("cat /tmp/writev_test.txt");
    unlink("/tmp/writev_test.txt");
}

/* ── 场景4：写入结构体（二进制数据）── */
typedef struct {
    int   id;
    char  name[32];
    float score;
} Student;

void write_struct_demo(void) {
    printf("\n=== 场景4：写入/读取结构体 ===\n");

    const char *path = "/tmp/students.bin";
    Student students[] = {
        { 1, "张三", 95.5f },
        { 2, "李四", 87.0f },
        { 3, "王五", 92.3f },
    };
    int count = sizeof(students) / sizeof(students[0]);

    /* 写入 */
    int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
    for (int i = 0; i < count; i++) {
        ssize_t n = write(fd, &students[i], sizeof(Student));
        printf("  写入学生 %s：%zd 字节\n", students[i].name, n);
    }
    close(fd);

    /* 读取验证 */
    fd = open(path, O_RDONLY);
    Student s;
    printf("  读取验证：\n");
    while (read(fd, &s, sizeof(Student)) == sizeof(Student)) {
        printf("    ID=%-3d  姓名=%-10s  分数=%.1f\n",
               s.id, s.name, s.score);
    }
    close(fd);
    unlink(path);
}

/* ── 场景5：fsync 确保数据落盘 ── */
void fsync_demo(void) {
    printf("\n=== 场景5：fsync 确保数据落盘 ===\n");

    int fd = open("/tmp/fsync_test.dat",
                  O_WRONLY | O_CREAT | O_TRUNC, 0644);

    write(fd, "重要数据\n", 9);

    /* write 只是写入内核页缓存（Page Cache），断电可能丢失
     * fsync 强制将页缓存刷新到磁盘（数据 + 元数据）
     * fdatasync 只刷新数据，不刷新元数据（更快）
     */
    if (fsync(fd) == 0) {
        printf("  fsync 成功：数据已持久化到磁盘\n");
    } else {
        perror("  fsync");
    }

    /* fdatasync：只同步数据，不同步 inode 元数据（如修改时间）*/
    write(fd, "更多数据\n", 9);
    if (fdatasync(fd) == 0) {
        printf("  fdatasync 成功：数据已持久化（不含元数据）\n");
    }

    close(fd);
    unlink("/tmp/fsync_test.dat");
}

int main(void) {
    write_basic_demo();
    write_append_demo();
    writev_demo();
    write_struct_demo();
    fsync_demo();
    return 0;
}

复制代码

gcc -o write_demo write_demo.c
./write_demo
# 输出示例：
# === 场景1：基本写入 ===
#   写入第1行：22 字节
#   写入第2行：22 字节
#   写入第3行：23 字节
#   总计写入：67 字节
#
# === 场景2：O_APPEND 原子追加 ===
# 文件内容：
# 进程A写入（非原子）
# 进程B写入（O_APPEND 原子）
#
# === 场景3：writev 聚集写入 ===
# writev 一次写入 51 字节（3个缓冲区）
# === 报告头部 ===
# 报告正文内容...
# === 报告尾部 ===

5. close --- 关闭文件

5.1 函数原型与注意事项

复制代码

#include <unistd.h>

/* 关闭文件描述符
 * 返回值：0 成功，-1 失败（检查 errno）
 */
int close(int fd);

复制代码

/* 文件名：close_demo.c
 * 演示 close 的正确使用方式和常见陷阱
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>

int main(void) {
    /* ── 陷阱1：重复关闭同一个 fd（双重关闭）── */
    printf("=== 陷阱1：双重关闭 ===\n");
    int fd = open("/etc/hostname", O_RDONLY);
    printf("打开 fd=%d\n", fd);

    close(fd);
    printf("第一次 close(fd=%d)：成功\n", fd);

    /* 危险！fd 已被关闭，可能已被重新分配给其他文件
     * 再次 close 可能关闭了别人的 fd！
     */
    int ret = close(fd);
    printf("第二次 close(fd=%d)：ret=%d, errno=%d (%s)\n",
           fd, ret, errno, strerror(errno));

    /* 正确做法：关闭后将 fd 设为 -1 */
    fd = open("/etc/hostname", O_RDONLY);
    close(fd);
    fd = -1;   /* 标记为无效，防止误用 */
    printf("关闭后设 fd=-1，再次 close(-1): %s\n",
           close(fd) == -1 ? "安全失败（预期）" : "意外成功");

    /* ── 陷阱2：close 失败时的处理 ── */
    printf("\n=== 陷阱2：close 的返回值 ===\n");
    /* close 失败的常见原因：
     * EBADF  - fd 无效（已关闭或从未打开）
     * EINTR  - 被信号中断（需要重试？有争议！）
     * EIO    - I/O 错误（NFS 等网络文件系统）
     *
     * 注意：即使 close 返回 -1，fd 也已经被关闭了！
     * 不要在 close 失败时重试（会导致双重关闭）
     */
    fd = open("/tmp/close_test.txt",
              O_WRONLY | O_CREAT | O_TRUNC, 0644);
    write(fd, "test\n", 5);

    /* 正确的 close 错误处理 */
    if (close(fd) == -1) {
        /* 记录错误，但不重试 */
        perror("close");
        /* 数据可能未完全写入磁盘（EIO 情况）*/
    } else {
        printf("close 成功\n");
    }
    fd = -1;   /* 无论成功失败，fd 都已无效 */

    /* ── 陷阱3：忘记关闭 fd（fd 泄漏）── */
    printf("\n=== 陷阱3：fd 泄漏检测 ===\n");
    int before = 0, after = 0;

    /* 统计当前打开的 fd 数量 */
    char cmd[64];
    snprintf(cmd, sizeof(cmd), "ls /proc/%d/fd | wc -l", getpid());

    printf("打开大量文件前：");
    fflush(stdout);
    system(cmd);

    /* 模拟 fd 泄漏：打开文件但不关闭 */
    int leaked_fds[10];
    for (int i = 0; i < 10; i++) {
        leaked_fds[i] = open("/dev/null", O_RDONLY);
    }

    printf("泄漏 10 个 fd 后：");
    fflush(stdout);
    system(cmd);

    /* 修复泄漏 */
    for (int i = 0; i < 10; i++) {
        close(leaked_fds[i]);
        leaked_fds[i] = -1;
    }

    printf("关闭后：");
    fflush(stdout);
    system(cmd);

    unlink("/tmp/close_test.txt");
    return 0;
}

复制代码

gcc -o close_demo close_demo.c
./close_demo
# 输出示例：
# === 陷阱1：双重关闭 ===
# 打开 fd=3
# 第一次 close(fd=3)：成功
# 第二次 close(fd=3)：ret=-1, errno=9 (Bad file descriptor)
# 关闭后设 fd=-1，再次 close(-1): 安全失败（预期）
#
# === 陷阱2：close 的返回值 ===
# close 成功
#
# === 陷阱3：fd 泄漏检测 ===
# 打开大量文件前：5
# 泄漏 10 个 fd 后：15
# 关闭后：5

6. lseek --- 文件偏移量

6.1 函数原型与 whence 参数

复制代码

#include <unistd.h>

/* 移动文件偏移量（读写位置）
 * whence：
 *   SEEK_SET  从文件开头偏移 offset 字节
 *   SEEK_CUR  从当前位置偏移 offset 字节（可为负）
 *   SEEK_END  从文件末尾偏移 offset 字节（可为负）
 * 返回值：新的文件偏移量（字节），失败返回 -1
 */
off_t lseek(int fd, off_t offset, int whence);

复制代码

/* 文件名：lseek_demo.c
 * 演示 lseek 的各种用法，包括文件空洞
 */
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>

int main(void) {
    const char *path = "/tmp/lseek_demo.bin";

    /* 创建测试文件 */
    int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
    write(fd, "ABCDEFGHIJ", 10);   /* 写入10字节 */

    /* ── 基本 lseek 操作 ── */
    printf("=== 基本 lseek 操作 ===\n");

    off_t pos;

    /* SEEK_SET：从头偏移 */
    pos = lseek(fd, 0, SEEK_SET);
    printf("SEEK_SET(0):  偏移量 = %ld（文件开头）\n", (long)pos);

    /* SEEK_CUR：从当前位置偏移 */
    pos = lseek(fd, 3, SEEK_CUR);
    printf("SEEK_CUR(+3): 偏移量 = %ld\n", (long)pos);

    /* 读取当前位置的字节 */
    char c;
    read(fd, &c, 1);
    printf("  当前位置字符: '%c'（期望 'D'）\n", c);

    /* SEEK_END：从末尾偏移 */
    pos = lseek(fd, 0, SEEK_END);
    printf("SEEK_END(0):  偏移量 = %ld（文件大小）\n", (long)pos);

    pos = lseek(fd, -3, SEEK_END);
    printf("SEEK_END(-3): 偏移量 = %ld（倒数第3字节）\n", (long)pos);
    read(fd, &c, 1);
    printf("  倒数第3字节: '%c'（期望 'H'）\n", c);

    /* ── 获取文件大小的技巧 ── */
    printf("\n=== 获取文件大小 ===\n");
    off_t file_size = lseek(fd, 0, SEEK_END);
    printf("文件大小: %ld 字节\n", (long)file_size);
    lseek(fd, 0, SEEK_SET);   /* 记得回到开头 */

    /* ── 文件空洞（Sparse File）── */
    printf("\n=== 文件空洞（Sparse File）===\n");
    /* lseek 超过文件末尾后写入，中间形成"空洞"
     * 空洞不占用实际磁盘空间，读取时返回 0 字节
     */
    close(fd);
    fd = open("/tmp/sparse.bin", O_RDWR | O_CREAT | O_TRUNC, 0644);

    write(fd, "HEAD", 4);                    /* 偏移 0~3 */
    lseek(fd, 1024 * 1024, SEEK_SET);        /* 跳过 1MB */
    write(fd, "TAIL", 4);                    /* 偏移 1MB~1MB+3 */

    struct stat st;
    fstat(fd, &st);
    printf("文件逻辑大小:   %ld 字节（%.1f MB）\n",
           (long)st.st_size, st.st_size / 1048576.0);
    printf("实际占用块数:   %ld 块（%ld 字节）\n",
           (long)st.st_blocks, (long)st.st_blocks * 512);
    printf("空洞节省空间:   %.1f MB\n",
           (st.st_size - st.st_blocks * 512) / 1048576.0);

    /* 读取空洞区域（应该全是 0）*/
    lseek(fd, 4, SEEK_SET);   /* 跳过 HEAD */
    char hole_buf[8] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
    read(fd, hole_buf, sizeof(hole_buf));
    printf("空洞区域读取（期望全0）: ");
    for (int i = 0; i < 8; i++) printf("%02x ", hole_buf[i]);
    printf("\n");

    close(fd);
    unlink(path);
    unlink("/tmp/sparse.bin");
    return 0;
}

复制代码

gcc -o lseek_demo lseek_demo.c
./lseek_demo
# 输出示例：
# === 基本 lseek 操作 ===
# SEEK_SET(0):  偏移量 = 0（文件开头）
# SEEK_CUR(+3): 偏移量 = 3
#   当前位置字符: 'D'（期望 'D'）
# SEEK_END(0):  偏移量 = 10（文件大小）
# SEEK_END(-3): 偏移量 = 7（倒数第3字节）
#   倒数第3字节: 'H'（期望 'H'）
#
# === 获取文件大小 ===
# 文件大小: 10 字节
#
# === 文件空洞（Sparse File）===
# 文件逻辑大小:   1048580 字节（1.0 MB）
# 实际占用块数:   8 块（4096 字节）
# 空洞节省空间:   1.0 MB
# 空洞区域读取（期望全0）: 00 00 00 00 00 00 00 00

7. 文件描述符的内核数据结构

7.1 三层数据结构

复制代码

内核中与文件 I/O 相关的三层数据结构：

进程A                    进程B
fd 表                    fd 表
┌───┬──────┐            ┌───┬──────┐
│ 0 │  ●───┼──┐         │ 0 │  ●───┼──────────────────┐
│ 1 │  ●───┼──┼──┐      │ 1 │  ●───┼──┐               │
│ 2 │  ●───┼──┼──┼──┐   │ 3 │  ●───┼──┼──┐            │
│ 3 │  ●───┼──┼──┼──┼─┐ └───┴──────┘  │  │            │
└───┴──────┘  │  │  │ │               │  │            │
              │  │  │ │  系统打开文件表 │  │            │
              │  │  │ │  ┌────────────▼──▼────────┐   │
              │  │  │ └─►│ 偏移量=0  flags=O_RDWR  │   │
              │  │  │    │ inode ●──────────────────┼──►inode A
              │  │  │    └───────────────────────────┘  (磁盘文件)
              │  │  │    ┌────────────────────────┐
              │  │  └───►│ 偏移量=0  flags=O_WRONLY│
              │  │       │ inode ●──────────────────┼──►inode B
              │  │       └───────────────────────────┘
              │  │       ┌────────────────────────┐
              │  └──────►│ 偏移量=100 flags=O_RDWR │
              │          │ inode ●──────────────────┼──►inode C
              │          └───────────────────────────┘
              │          ┌────────────────────────┐
              └─────────►│ 偏移量=0  flags=O_RDONLY│
                         │ inode ●──────────────────┼──►inode A（共享）
                         └───────────────────────────┘

关键点：
  1. 同一进程的两个 fd 可以指向同一打开文件表项（dup）
  2. 不同进程的 fd 可以指向同一打开文件表项（fork 继承）
  3. 不同打开文件表项可以指向同一 inode（多次 open 同一文件）
  4. 每个打开文件表项有独立的偏移量和标志

7.2 dup / dup2 --- 复制文件描述符

复制代码

/* 文件名：dup_demo.c
 * 演示 dup/dup2 的使用：重定向与 fd 复制
 */
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>

int main(void) {
    /* ── dup：复制 fd，共享同一打开文件表项 ── */
    printf("=== dup：复制文件描述符 ===\n");

    int fd = open("/tmp/dup_test.txt",
                  O_RDWR | O_CREAT | O_TRUNC, 0644);
    int fd2 = dup(fd);   /* fd2 是 fd 的副本，指向同一文件表项 */

    printf("原始 fd=%d，dup 后 fd2=%d\n", fd, fd2);

    /* 通过 fd 写入 */
    write(fd, "Hello ", 6);
    /* 通过 fd2 写入（共享偏移量！）*/
    write(fd2, "World\n", 6);

    /* 读取验证 */
    lseek(fd, 0, SEEK_SET);
    char buf[32] = {0};
    read(fd, buf, sizeof(buf) - 1);
    printf("文件内容: %s", buf);   /* 应该是 "Hello World\n" */

    /* 关闭 fd，fd2 仍然有效（引用计数）*/
    close(fd);
    lseek(fd2, 0, SEEK_SET);
    memset(buf, 0, sizeof(buf));
    read(fd2, buf, sizeof(buf) - 1);
    printf("关闭 fd 后通过 fd2 读取: %s", buf);
    close(fd2);

    /* ── dup2：将 fd 复制到指定编号（用于重定向）── */
    printf("\n=== dup2：标准输出重定向 ===\n");

    /* 保存原始 stdout */
    int saved_stdout = dup(STDOUT_FILENO);

    /* 打开日志文件，将 stdout 重定向到它 */
    int log_fd = open("/tmp/stdout_redirect.log",
                      O_WRONLY | O_CREAT | O_TRUNC, 0644);
    dup2(log_fd, STDOUT_FILENO);   /* stdout(1) 现在指向 log_fd */
    close(log_fd);

    /* 这些 printf 输出到文件，而不是终端 */
    printf("这行输出到文件（不在终端显示）\n");
    printf("重定向测试成功！\n");
    fflush(stdout);

    /* 恢复 stdout */
    dup2(saved_stdout, STDOUT_FILENO);
    close(saved_stdout);

    printf("stdout 已恢复，这行在终端显示\n");
    printf("日志文件内容：\n");
    system("cat /tmp/stdout_redirect.log");

    /* ── dup3：dup2 + O_CLOEXEC（Linux 特有）── */
    printf("\n=== dup3：带 O_CLOEXEC 的 dup2 ===\n");
    int fd3 = open("/dev/null", O_RDONLY);
    int fd4 = dup3(fd3, 10, O_CLOEXEC);   /* 复制到 fd=10，设置 CLOEXEC */
    printf("dup3 到 fd=%d，FD_CLOEXEC=%s\n",
           fd4, (fcntl(fd4, F_GETFD) & FD_CLOEXEC) ? "已设置" : "未设置");
    close(fd3);
    close(fd4);

    unlink("/tmp/dup_test.txt");
    unlink("/tmp/stdout_redirect.log");
    return 0;
}

复制代码

gcc -o dup_demo dup_demo.c
./dup_demo
# 输出示例：
# === dup：复制文件描述符 ===
# 原始 fd=3，dup 后 fd2=4
# 文件内容: Hello World
# 关闭 fd 后通过 fd2 读取: Hello World
#
# === dup2：标准输出重定向 ===
# stdout 已恢复，这行在终端显示
# 日志文件内容：
# 这行输出到文件（不在终端显示）
# 重定向测试成功！
#
# === dup3：带 O_CLOEXEC 的 dup2 ===
# dup3 到 fd=10，FD_CLOEXEC: 已设置

8. 高级 I/O 操作

8.1 pread / pwrite --- 指定偏移量读写（不改变文件偏移）

复制代码

/* 文件名：pread_pwrite.c
 * pread/pwrite：在指定偏移量处读写，不改变文件当前偏移
 * 多线程场景下比 lseek+read/write 更安全（原子操作）
 */
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>

#define FILE_PATH "/tmp/pread_test.bin"
#define BLOCK_SIZE 16

/* 多线程并发读取不同区域 */
typedef struct {
    int    fd;
    off_t  offset;
    int    thread_id;
} ReadArgs;

void *thread_read(void *arg) {
    ReadArgs *a = (ReadArgs *)arg;
    char buf[BLOCK_SIZE + 1] = {0};

    /* pread：原子的 lseek+read，多线程安全 */
    ssize_t n = pread(a->fd, buf, BLOCK_SIZE, a->offset);
    printf("  线程%d：从偏移 %ld 读取 %zd 字节: [%s]\n",
           a->thread_id, (long)a->offset, n, buf);
    return NULL;
}

int main(void) {
    /* 创建测试文件：每16字节一个块 */
    int fd = open(FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0644);
    pwrite(fd, "BLOCK_0_AAAAAAAA", BLOCK_SIZE, 0);
    pwrite(fd, "BLOCK_1_BBBBBBBB", BLOCK_SIZE, BLOCK_SIZE);
    pwrite(fd, "BLOCK_2_CCCCCCCC", BLOCK_SIZE, BLOCK_SIZE * 2);

    printf("=== pread/pwrite 演示 ===\n");
    printf("文件已写入3个块（每块%d字节）\n\n", BLOCK_SIZE);

    /* 验证 pread 不改变文件偏移 */
    off_t before = lseek(fd, 0, SEEK_CUR);
    char buf[BLOCK_SIZE + 1] = {0};
    pread(fd, buf, BLOCK_SIZE, BLOCK_SIZE);   /* 读取第2块 */
    off_t after = lseek(fd, 0, SEEK_CUR);

    printf("pread 前偏移: %ld\n", (long)before);
    printf("pread 读取:   [%s]\n", buf);
    printf("pread 后偏移: %ld（未改变！）\n\n", (long)after);

    /* 多线程并发 pread（安全）*/
    printf("多线程并发 pread：\n");
    pthread_t threads[3];
    ReadArgs args[3] = {
        { fd, 0,            1 },
        { fd, BLOCK_SIZE,   2 },
        { fd, BLOCK_SIZE*2, 3 },
    };
    for (int i = 0; i < 3; i++)
        pthread_create(&threads[i], NULL, thread_read, &args[i]);
    for (int i = 0; i < 3; i++)
        pthread_join(threads[i], NULL);

    close(fd);
    unlink(FILE_PATH);
    return 0;
}

复制代码

gcc -o pread_pwrite pread_pwrite.c -lpthread
./pread_pwrite
# 输出示例：
# === pread/pwrite 演示 ===
# 文件已写入3个块（每块16字节）
#
# pread 前偏移: 0
# pread 读取:   [BLOCK_1_BBBBBBBB]
# pread 后偏移: 0（未改变！）
#
# 多线程并发 pread：
#   线程1：从偏移 0  读取 16 字节: [BLOCK_0_AAAAAAAA]
#   线程2：从偏移 16 读取 16 字节: [BLOCK_1_BBBBBBBB]
#   线程3：从偏移 32 读取 16 字节: [BLOCK_2_CCCCCCCC]

8.2 fcntl --- 文件控制

复制代码

/* 文件名：fcntl_demo.c
 * fcntl：对已打开的 fd 进行各种控制操作
 */
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>

int main(void) {
    int fd = open("/tmp/fcntl_test.txt",
                  O_RDWR | O_CREAT | O_TRUNC, 0644);
    write(fd, "fcntl test content\n", 19);

    /* ── F_GETFL / F_SETFL：获取/设置文件状态标志 ── */
    printf("=== F_GETFL / F_SETFL ===\n");
    int flags = fcntl(fd, F_GETFL);
    printf("当前标志: 0x%x\n", flags);
    printf("访问模式: %s\n",
           (flags & O_ACCMODE) == O_RDWR ? "O_RDWR" :
           (flags & O_ACCMODE) == O_RDONLY ? "O_RDONLY" : "O_WRONLY");

    /* 动态添加 O_APPEND 标志 */
    fcntl(fd, F_SETFL, flags | O_APPEND);
    flags = fcntl(fd, F_GETFL);
    printf("添加 O_APPEND 后: O_APPEND=%s\n",
           (flags & O_APPEND) ? "已设置" : "未设置");

    /* 动态添加 O_NONBLOCK（对管道/套接字有效）*/
    fcntl(fd, F_SETFL, flags | O_NONBLOCK);
    printf("添加 O_NONBLOCK 后: O_NONBLOCK=%s\n",
           (fcntl(fd, F_GETFL) & O_NONBLOCK) ? "已设置" : "未设置");

    /* ── F_GETFD / F_SETFD：获取/设置 fd 标志 ── */
    printf("\n=== F_GETFD / F_SETFD ===\n");
    int fd_flags = fcntl(fd, F_GETFD);
    printf("FD_CLOEXEC: %s\n",
           (fd_flags & FD_CLOEXEC) ? "已设置" : "未设置");

    /* 设置 FD_CLOEXEC */
    fcntl(fd, F_SETFD, fd_flags | FD_CLOEXEC);
    printf("设置后 FD_CLOEXEC: %s\n",
           (fcntl(fd, F_GETFD) & FD_CLOEXEC) ? "已设置" : "未设置");

    /* ── F_DUPFD：复制 fd（类似 dup，但可指定最小编号）── */
    printf("\n=== F_DUPFD ===\n");
    int fd_dup = fcntl(fd, F_DUPFD, 10);   /* 复制到 >=10 的最小可用 fd */
    printf("F_DUPFD(>=10) 得到 fd=%d\n", fd_dup);
    close(fd_dup);

    /* ── F_GETLK / F_SETLK：文件锁 ── */
    printf("\n=== 文件锁（Advisory Lock）===\n");
    struct flock lock = {
        .l_type   = F_WRLCK,    /* 写锁 */
        .l_whence = SEEK_SET,
        .l_start  = 0,
        .l_len    = 0,          /* 0 表示锁定整个文件 */
    };

    if (fcntl(fd, F_SETLK, &lock) == 0) {
        printf("获取写锁成功\n");

        /* 查询锁信息 */
        struct flock check = { .l_type = F_WRLCK, .l_whence = SEEK_SET,
                               .l_start = 0, .l_len = 0 };
        fcntl(fd, F_GETLK, &check);
        if (check.l_type == F_UNLCK) {
            printf("锁查询：无冲突锁\n");
        } else {
            printf("锁查询：被 PID=%d 持有\n", check.l_pid);
        }

        /* 释放锁 */
        lock.l_type = F_UNLCK;
        fcntl(fd, F_SETLK, &lock);
        printf("锁已释放\n");
    } else {
        perror("F_SETLK");
    }

    close(fd);
    unlink("/tmp/fcntl_test.txt");
    return 0;
}

复制代码

gcc -o fcntl_demo fcntl_demo.c
./fcntl_demo
# 输出示例：
# === F_GETFL / F_SETFL ===
# 当前标志: 0x2
# 访问模式: O_RDWR
# 添加 O_APPEND 后: O_APPEND=已设置
# 添加 O_NONBLOCK 后: O_NONBLOCK=已设置
#
# === F_GETFD / F_SETFD ===
# FD_CLOEXEC: 未设置
# 设置后 FD_CLOEXEC: 已设置
#
# === F_DUPFD ===
# F_DUPFD(>=10) 得到 fd=10
#
# === 文件锁（Advisory Lock）===
# 获取写锁成功
# 锁查询：无冲突锁
# 锁已释放

9. 综合实践

9.1 实现一个简单的文件复制工具

复制代码

/* 文件名：mycp.c
 * 实现类似 cp 命令的文件复制工具
 * 特性：健壮的错误处理、进度显示、保留权限
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>

#define BUF_SIZE (64 * 1024)   /* 64KB 缓冲区（最优 I/O 大小）*/

/* 健壮的 write：处理短写和 EINTR */
static ssize_t write_all(int fd, const void *buf, size_t count) {
    const char *ptr = (const char *)buf;
    size_t remaining = count;
    while (remaining > 0) {
        ssize_t n = write(fd, ptr, remaining);
        if (n == -1) {
            if (errno == EINTR) continue;
            return -1;
        }
        ptr       += n;
        remaining -= (size_t)n;
    }
    return (ssize_t)count;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        fprintf(stderr, "用法: %s <源文件> <目标文件>\n", argv[0]);
        return 1;
    }

    const char *src_path = argv[1];
    const char *dst_path = argv[2];

    /* 打开源文件 */
    int src_fd = open(src_path, O_RDONLY);
    if (src_fd == -1) {
        fprintf(stderr, "无法打开源文件 '%s': %s\n",
                src_path, strerror(errno));
        return 1;
    }

    /* 获取源文件元数据（大小、权限）*/
    struct stat src_stat;
    if (fstat(src_fd, &src_stat) == -1) {
        perror("fstat");
        close(src_fd);
        return 1;
    }

    /* 打开目标文件（使用源文件权限）*/
    int dst_fd = open(dst_path,
                      O_WRONLY | O_CREAT | O_TRUNC,
                      src_stat.st_mode & 0777);
    if (dst_fd == -1) {
        fprintf(stderr, "无法创建目标文件 '%s': %s\n",
                dst_path, strerror(errno));
        close(src_fd);
        return 1;
    }

    /* 分配 I/O 缓冲区 */
    char *buf = malloc(BUF_SIZE);
    if (!buf) {
        fprintf(stderr, "内存分配失败\n");
        close(src_fd); close(dst_fd);
        return 1;
    }

    /* 复制循环 */
    ssize_t n_read;
    long long total_copied = 0;
    long long file_size = (long long)src_stat.st_size;

    printf("复制: %s → %s\n", src_path, dst_path);
    printf("文件大小: %lld 字节\n", file_size);

    while ((n_read = read(src_fd, buf, BUF_SIZE)) > 0) {
        if (write_all(dst_fd, buf, (size_t)n_read) == -1) {
            fprintf(stderr, "写入失败: %s\n", strerror(errno));
            goto cleanup;
        }
        total_copied += n_read;

        /* 显示进度 */
        if (file_size > 0) {
            int pct = (int)(total_copied * 100 / file_size);
            printf("\r进度: %lld/%lld 字节 (%d%%)",
                   total_copied, file_size, pct);
            fflush(stdout);
        }
    }

    if (n_read == -1) {
        fprintf(stderr, "\n读取失败: %s\n", strerror(errno));
        goto cleanup;
    }

    /* 确保数据落盘 */
    if (fsync(dst_fd) == -1) {
        fprintf(stderr, "\nfsync 失败: %s\n", strerror(errno));
    }

    printf("\n复制完成！共复制 %lld 字节\n", total_copied);

    /* 验证大小一致 */
    struct stat dst_stat;
    fstat(dst_fd, &dst_stat);
    printf("验证: 源=%lld 字节，目标=%lld 字节，%s\n",
           (long long)src_stat.st_size,
           (long long)dst_stat.st_size,
           src_stat.st_size == dst_stat.st_size ? "✓ 一致" : "✗ 不一致");

cleanup:
    free(buf);
    close(src_fd);
    if (close(dst_fd) == -1) {
        fprintf(stderr, "关闭目标文件失败: %s\n", strerror(errno));
    }
    return (n_read == -1) ? 1 : 0;
}

复制代码

gcc -O2 -o mycp mycp.c

# 复制一个文件
./mycp /etc/passwd /tmp/passwd_copy
# 输出：
# 复制: /etc/passwd → /tmp/passwd_copy
# 文件大小: 2847 字节
# 进度: 2847/2847 字节 (100%)
# 复制完成！共复制 2847 字节
# 验证: 源=2847 字节，目标=2847 字节，✓ 一致

# 验证内容一致
diff /etc/passwd /tmp/passwd_copy && echo "文件内容完全一致 ✓"
rm /tmp/passwd_copy

9.2 文件 I/O 性能测试

复制代码

#!/bin/bash
# 文件名：io_benchmark.sh
# 功能：测试不同 I/O 方式的性能

set -euo pipefail

TEST_FILE="/tmp/io_bench_test.dat"
SIZE_MB=64

echo "═══════════════════════════════════════════════"
echo "  文件 I/O 性能基准测试（${SIZE_MB}MB 文件）"
echo "═══════════════════════════════════════════════"

# 清除页缓存（需要 root，跳过则测试缓存命中性能）
# echo 3 > /proc/sys/vm/drop_caches

# ── 测试1：dd 顺序写入（大块）──
echo ""
echo "【测试1】dd 顺序写入（bs=64K）"
time dd if=/dev/zero of="$TEST_FILE" bs=64K count=$((SIZE_MB*16)) \
    conv=fsync 2>&1 | grep -E "copied|bytes"

# ── 测试2：dd 顺序读取 ──
echo ""
echo "【测试2】dd 顺序读取（bs=64K）"
time dd if="$TEST_FILE" of=/dev/null bs=64K 2>&1 | grep -E "copied|bytes"

# ── 测试3：小块写入（模拟日志写入）──
echo ""
echo "【测试3】小块写入（bs=512，模拟日志）"
time dd if=/dev/zero of="$TEST_FILE" bs=512 count=$((SIZE_MB*2048)) \
    2>&1 | grep -E "copied|bytes"

# ── 测试4：Python 对比（stdio 缓冲）──
echo ""
echo "【测试4】Python stdio 写入对比"
python3 -c "
import time, os
path = '$TEST_FILE'
size = ${SIZE_MB} * 1024 * 1024
data = b'x' * 65536  # 64KB 块

start = time.time()
with open(path, 'wb') as f:
    written = 0
    while written < size:
        f.write(data)
        written += len(data)
elapsed = time.time() - start
print(f'  Python stdio: {size/elapsed/1024/1024:.1f} MB/s')
"

rm -f "$TEST_FILE"
echo ""
echo "测试完成。"

复制代码

chmod +x io_benchmark.sh
./io_benchmark.sh

知识点总结

复制代码

第 3 章 核心知识图谱
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

┌──────────────────────────────────────────────────────────┐
│                  文件 I/O 基础                            │
└──────┬──────────┬──────────┬──────────┬──────────────────┘
       │          │          │          │
  ┌────▼───┐ ┌───▼────┐ ┌───▼────┐ ┌───▼────┐
  │  open  │ │  read  │ │ write  │ │ close  │
  └────┬───┘ └───┬────┘ └───┬────┘ └───┬────┘
       │         │          │          │
  flags/mode  循环读取   短写处理   避免双重关闭
  O_CREAT     read_exact  write_all  fd=-1标记
  O_EXCL      EOF检测    O_APPEND   EINTR处理
  O_APPEND    EINTR重试  fsync落盘
  O_CLOEXEC   二进制读   writev聚写

文件描述符体系：
  fd 表（进程级）→ 打开文件表（系统级）→ inode（磁盘）
  dup/dup2：共享打开文件表项（共享偏移量）
  fork：子进程继承父进程 fd 表

高级操作：
  lseek    → 移动偏移量 / 文件空洞
  pread    → 原子 lseek+read（多线程安全）
  pwrite   → 原子 lseek+write（多线程安全）
  fcntl    → 动态修改标志 / 文件锁
  writev   → 聚集写入（减少系统调用）

黄金法则：
  ① open 后检查返回值，失败立即处理
  ② read 返回 0 = EOF，< 0 = 错误，> 0 = 实际字节数
  ③ write 可能短写，生产代码必须循环写完
  ④ close 后立即将 fd 设为 -1，防止双重关闭
  ⑤ 多线程 I/O 用 pread/pwrite，避免 lseek+read 竞争
  ⑥ 重要数据写入后调用 fsync 确保落盘
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

📚 参考资料

man 2 open / man 2 read / man 2 write / man 2 close

man 2 lseek / man 2 pread / man 2 fcntl / man 2 dup

man 2 writev / man 2 fsync / man 2 fstat

《Linux/UNIX 系统编程手册》第 4、5 章 --- Michael Kerrisk

《UNIX 环境高级编程（APUE）》第 3 章 --- W. Richard Stevens