28、Linux文件IO与标准IO详解：从概念到实战

Linux文件IO与标准IO详解：从概念到实战

一、核心概念：文件IO vs 标准IO

1.1 基本定义

文件IO（系统调用） ：操作系统为用户操作文件提供的底层系统函数（如open/read/write/close），直接和内核交互，也叫「低级IO」。
标准IO（C库函数） ：C语言标准库封装的文件操作函数（如fopen/fread/fwrite/fclose），底层调用文件IO，跨平台性更强，也叫「高级IO」。

1.2 底层关系与核心区别

C库函数是对系统调用的封装：标准IO为了提升效率增加了用户态缓存，而文件IO无缓存（直接操作内核态），两者的核心区别可总结为：

特性	文件IO（系统调用）	标准IO（C库）
操作接口	`open/read/write/close/lseek`	`fopen/fread/fwrite/fclose/fseek`
标识类型	文件描述符（int，如0/1/2分别对应stdin/stdout/stderr）	文件流指针（FILE*）
缓存机制	无缓存（直接操作内核）	带用户态缓存（减少系统调用次数）
跨平台性	依赖操作系统（如Linux特有）	跨平台（符合C标准）
适用场景	设备文件、实时性要求高的场景	普通文件、追求效率的通用场景
权限控制	打开时直接指定（如0666）	依赖fopen模式（如"r+"）

二、文件IO（系统调用）实战

文件IO是Linux内核提供的底层接口，无缓存、功能强大，适合对设备/实时性要求高的场景，核心函数包括open/read/write/lseek/close。

2.1 open：打开/创建文件

open函数用于打开或创建文件，返回唯一的文件描述符（fd），失败返回-1。

c 复制代码

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>

int open(const char *pathname, int flags, int mode);

参数说明 ：
- pathname：文件路径/名称；
- flags：打开模式（核心：O_RDONLY只读/O_WRONLY只写/O_RDWR读写，扩展：O_CREAT创建/O_TRUNC清空/O_APPEND追加）；
- mode：创建文件时的权限（如0666，表示所有用户可读可写）。

实战示例（01open.c）：创建并清空1.txt文件

c 复制代码

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdio.h>

int main(int argc, char **argv)
{
    // 以只写模式打开，不存在则创建，存在则清空，权限0666
    int fd = open("1.txt",  O_WRONLY| O_CREAT|O_TRUNC,0666);
    if(-1 == fd) // 错误处理：文件描述符返回-1表示失败
    {
        fprintf(stderr,"open error\n");
        return 1;
    }
    return 0;
}

2.2 read/write：读写文件

文件IO的读写通过read（读）和write（写）实现，直接操作字节流，无缓存。

（1）write：写入文件

c 复制代码

ssize_t write(int fd, const void *buf, size_t count);

fd：目标文件描述符；
buf：待写入数据的缓冲区；
count：待写入的有效字节数；
返回值：成功返回实际写入字节数，失败返回-1。

实战示例（04write.c）：向1.txt写入"hello"

c 复制代码

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>

int main(int argc, char **argv)
{
    int fd = open("1.txt",  O_WRONLY| O_CREAT|O_TRUNC,0666);
    if(-1 == fd)
    {
        fprintf(stderr,"open error\n");
        return 1;
    }
    char str[100]="hello";
    // 关键：用strlen(str)而非sizeof(str)，避免写入空字符
    ssize_t ret = write(fd,str,strlen(str));
    printf("写入了%ld字节到文件",ret); // 输出：写入了5字节到文件
    close(fd); // 必须关闭文件，释放文件描述符
    return 0;
}

（2）read：读取文件

c 复制代码

ssize_t read(int fd, void *buf, size_t count);

fd：源文件描述符；
buf：接收数据的缓冲区；
count：缓冲区最大长度；
返回值：>0实际读取字节数，==0文件末尾，<0读取失败。

实战示例（05read.c）：读取/etc/passwd文件内容

c 复制代码

#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>

int main()
{
    int fd = open("/etc/passwd", O_RDONLY); // 只读打开系统文件
    if (-1 == fd)
    {
        fprintf(stderr, "open error\n");
        return 1;
    }
    char buf[50] = {0};
    while (1) {
        bzero(buf, sizeof(buf)); // 清空缓冲区
        int ret = read(fd, buf, sizeof(buf)-1); // 留1位存'\0'
        if (ret <= 0) {
            break; // 读取完毕或失败，退出循环
        }
        printf("{%d}:{%s}\n",ret,buf); // 打印读取长度和内容
    }
    close(fd);
    return 0;
}

2.3 实战案例：文件拷贝（06readcp.c）

结合read/write实现简易版cp命令，核心逻辑是「循环读取源文件→写入目标文件」：

c 复制代码

#include <fcntl.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>

int main(int argc ,char **argv)
{
    // 检查参数：需传入源文件和目标文件
    if (argc < 3)
    {
        printf("usage:./a.out srcfile dstfile\n");
        return 1;
    }
    // 打开源文件（只读）和目标文件（只写/创建/清空）
    int fd_src = open(argv[1], O_RDONLY);
    int fd_dst = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC,0666);

    if (-1 == fd_dst || -1 ==fd_src)
    {
        fprintf(stderr, "open error\n");
        return 1;
    }
    // 循环读写：1024字节缓冲区提升效率
    while (1) {
        char buffer[1024] = {0};
        int ret = read(fd_src, buffer, sizeof(buffer));
        if (ret <= 0)
        {
            break;
        }
        write(fd_dst, buffer, ret); // 按实际读取长度写入
    }
    // 关闭文件描述符
    close(fd_dst);
    close(fd_src);
    return 0;
}

2.4 lseek：调整文件偏移量

lseek用于移动文件读写指针，可实现「获取文件大小」「空洞文件创建」等功能：

c 复制代码

#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>

int main()
{
    int fd = open("1.txt", O_RDWR);
    if (-1 == fd)
    {
        fprintf(stderr, "open error\n");
        return 1;
    }
    // 方式1：获取文件大小（偏移到末尾，返回偏移量）
    long size = lseek(fd, 0, SEEK_END);
    printf("size %ld\n",size);

    // 方式2：创建空洞文件（偏移到1MB位置写入）
    lseek(fd, 1024*1024, SEEK_SET);
    char str[]="travel";
    write(fd,str,strlen(str));
    close(fd);
    return 0;
}

2.5 标准输入输出（07stdin.c）

Linux中0/1/2分别对应标准输入（stdin）、标准输出（stdout）、标准错误（stderr），可直接通过文件描述符操作：

c 复制代码

#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>

int	main(int argc, char **argv)
{
    char buf[10]={0};
    printf("pls input num:");
    fflush(stdout); // 刷新缓存，确保提示语先输出
    read(0,buf,sizeof(buf)); // 从stdin（0）读取输入
    int num =atoi(buf); // 转为整数
    write(2,&num,4); // 向stderr（2）写入整数（二进制）
    return 0;
}

三、标准IO（C库）实战

标准IO是C库对文件IO的封装，自带缓存机制，跨平台性更好，核心函数包括fopen/fread/fwrite/fseek/ftell。

3.1 核心函数：文件读写与偏移

标准IO通过FILE*指针标识文件，结合fseek/ftell可轻松实现「文件插入内容」（02insert.c）：

c 复制代码

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int	main(int argc, char **argv)
{
    FILE*fp = fopen("1.txt","r+"); // 读写模式打开
    if(NULL == fp)
    {
        fprintf(stderr,"fopen error\n");
        return 1;
    }

    // 1. 获取文件大小：偏移到末尾，获取偏移量
    fseek(fp,0,SEEK_END);
    long size = ftell(fp);
    printf("size is %ld\n",size);

    // 2. 准备插入内容：保存指定位置后的内容
    int pos = 15; // 插入位置
    char insert_str[100]="hello";
    fseek(fp,pos,SEEK_SET); // 偏移到插入位置
    char * data =(char*)malloc(size); // 分配内存保存后半部分
    fread(data,size-pos,1,fp); // 读取后半部分内容

    // 3. 插入内容：覆盖写入新内容+原有后半部分
    fseek(fp,pos,SEEK_SET);
    fwrite(insert_str, strlen(insert_str), 1, fp); // 写入插入内容
    fwrite(data, size-pos, 1, fp); // 写入原有后半部分

    // 4. 释放资源
    fclose(fp);
    free(data);
    return 0;
}

3.2 实战案例：字典查询（03dict.c）

结合标准IO的fgets读取文件、链表存储数据，实现简易字典查询功能：

c 复制代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "list.h"

// 字典数据结构：单词+释义+链表节点
typedef struct
{
    char word[50];
    char mean[512];
    struct list_head node;
} DATATYPE;

// 添加单词到链表
int add_word(struct list_head* head, char* word, char* mean)
{
    DATATYPE* p = (DATATYPE*)malloc(sizeof(DATATYPE));
    if (NULL == p)
    {
        fprintf(stderr, "add malloc error\n");
        return -1;
    }
    strcpy(p->word, word);
    strcpy(p->mean, mean);
    list_add(&p->node, head); // 链表插入
    return 0;
}

// 查找单词
DATATYPE* find_word(struct list_head* head, char* want_word)
{
    DATATYPE* p = NULL;
    DATATYPE* n = NULL;
    // 安全遍历链表
    list_for_each_entry_safe(p, n, head, node)
    {
        if (0 == strcmp(want_word, p->word))
        {
            return p;
        }
    }
    return NULL;
}

int main(int argc, char** argv)
{
    // 1. 打开字典文件
    FILE* fp = fopen("/home/linux/dict.txt", "r");
    if (NULL == fp)
    {
        fprintf(stderr, "open error\n");
        return 1;
    }

    // 2. 初始化链表，读取字典文件
    struct list_head dict_head;
    INIT_LIST_HEAD(&dict_head);
    while (1)
    {
        char str[1024] = {0};
        if (NULL == fgets(str, sizeof(str), fp)) // 逐行读取
        {
            break;
        }
        // 分割单词和释义
        char* word = strtok(str, " ");
        char* mean = strtok(NULL, "\r");
        add_word(&dict_head, word, mean); // 添加到链表
    }

    // 3. 交互查询单词
    while (1)
    {
        char want_word[50] = {0};
        printf("pls input want_word:");
        fgets(want_word, sizeof(want_word), stdin); // 读取用户输入
        want_word[strlen(want_word) - 1] = '\0'; // 去掉换行符
        if(0 == strcmp(want_word,"#quit")) // 退出条件
        {
            break;
        }
        // 查找并输出结果
        DATATYPE* tmp = find_word(&dict_head,want_word);
        if(NULL == tmp)
        {
            printf("cant find word:%s\n",want_word);
        }
        else  
        {
            printf("word:%s mean:%s\n",tmp->word,tmp->mean);
        }
    }
    return 0;
}

四、核心总结

4.1 关键结论

底层关系：标准IO是文件IO的封装，跨平台优先选标准IO，设备/实时场景选文件IO；
缓存影响：标准IO的缓存可减少系统调用次数（提升效率），文件IO无缓存（实时性更高）；
标识差异：文件IO用文件描述符（int），标准IO用文件流指针（FILE*）；
错误处理：文件IO返回-1表示失败，标准IO返回NULL（如fopen），需严格校验返回值。

4.2 选型建议

操作普通文件（如txt、配置文件）：优先标准IO（fopen/fread/fwrite）；
操作设备文件（如串口、网卡）：优先文件IO（open/read/write）；
需要跨平台：必须用标准IO；
需要精准控制文件偏移/权限：用文件IO。

五、常见问题与注意事项

文件权限：open的mode参数（如0666）最终权限会受umask影响（实际权限=mode & ~umask）；
缓存问题：标准IO需注意fflush刷新缓存，避免数据未写入磁盘；
内存泄漏：标准IO的malloc内存、文件描述符/流指针必须手动释放/关闭；
参数校验：read/write的count参数需合理（如read用sizeof(buf)-1避免缓冲区溢出）。