XV6操作系统：proc机制学习笔记

梳理struct proc的结构如下，通过分析一个父子进程的程序关系来理解process的工作原理：

复制代码

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <string.h>

int main() {
    int fd;
    char buffer[64];
    const char *msg = "Hello!\n";

    pid_t pid = fork();

    if (pid < 0) {
        exit(1);
    } 
    else if (pid == 0) {
        fd = open("test.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
        write(fd, msg, strlen(msg));
        close(fd);
        exit(0);
    } 
    else {
        wait(NULL); 
        fd = open("test.txt", O_RDONLY);
        read(fd, buffer, sizeof(buffer));
        printf("父进程读取到子进程写下的: %s", buffer);
        close(fd);
    }

    return 0;
}

1.阶段一 fork

复制代码

...
pid_t pid = fork();
...

1.1 单核CPU情形

一个父进程fork出子进程（pid = 0）的过程，首先父进程需要在内存中遍历进程表，找到UNUSED的闲置进程。

接下来为子进程复制父进程的各种资源（openfile、cwd），分配属于它自己的pagetable页表、trapframe。再把state从USED设置为RUNNABLE，进入调度器就绪队列。

调度器scheduler()一直在寻找RUNNABLE的进程，通过上下文切换内存地址和寄存器的存储值（指的是struct context），状态state变为RUNNING。

1.2 多核CPU情形

单核CPU是不需要考虑多核的冲突问题的，实际上要是另外的CPU闲着就有很大概率会来插手，可能会导致proc槽位浪费亦或者程序冲突。

**这就是spinlock的价值。**CPU0会首先调用 acquire(&p->lock) 获取自旋锁, p是我们当前process的结构体名。此时，如果其他 CPU 核心想要动这个 proc 结构体，就会进入spin原地打转，直到 CPU0 把进程状态安全地改写为 USED 并释放锁。

仔细来看spinlock，这里有一个误区需要辩解。proc结构体中有spinlock结构体，spinlock结构体中有cpu结构体，cpu结构体中又有proc结构体。那不是内存要被无限套娃撑爆。然而spinlock结构体中保存的只是指向cpu结构体的指针，cpu结构体中也是这样的。

复制代码

struct spinlock {
  uint locked;       // Is the lock held?
  char *name;        // Name of lock.
  struct cpu *cpu;   // The cpu holding the lock.
};


struct cpu {
  struct proc *proc;          // The process running on this cpu, or null.
  struct context context;     // swtch() here to enter scheduler().
  int noff;                   // Depth of push_off() nesting.
  int intena;                 // Were interrupts enabled before push_off()?
};


struct proc {
  struct spinlock lock;
  enum procstate state;        // Process state
  void *chan;                  // If non-zero, sleeping on chan
  int killed;                  // If non-zero, have been killed
  int xstate;                  // Exit status to be returned to parent's wait
  int pid;                     // Process ID
  struct proc *parent;         // Parent process
  uint64 kstack;               // Virtual address of kernel stack
  uint64 sz;                   // Size of process memory (bytes)
  pagetable_t pagetable;       // User page table
  struct trapframe *trapframe; // data page for trampoline.S
  struct context context;      // swtch() here to run process
  struct file *ofile[NOFILE];  // Open files
  struct inode *cwd;           // Current directory
  char name[16];               // Process name (debugging)
};

这样的三角设计的设计导致

proc 包含 lock：锁保护进程状态，保护进程内部成员变量
lock 指向 cpu：防止其他CPU篡改数据
cpu 指向 proc：内核随时能知道当前在跑哪个进程

2.阶段二 trapframe

子进程开始运行，并调用了 open 和 write() 系统调用。

复制代码

...
else if (pid == 0) {
        fd = open("test.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
        write(fd, msg, strlen(msg));
...

2.1 用户态切换内核态（trapframe.S）

user程序没有权限直接操作硬盘。调用 open() 或者 write() 时，RISC-V 处理器会执行 ecall 指令，硬件立刻提高特权级，通过蹦床（内核态虚拟内存页表和用户态虚拟内存页表完全相同的地方，这样进行转换时具有绝对地址才不会出问题）跳入kernel态。以下为一次write系统调用示意图：

复制代码

#include "ritrampoline.Sscv.h"
#include "memlayout.h"

.section trampsec

.globl trampoline
.globl usertrap

trampoline:
.aglign 4
.globl uservec
.globl userret
uservec:
    csrw sscratch, a0 # a0写入sscratch,用户态数据暂存
    li a0, TRAPFRAME  # 用trapframe替代a0

    # save the user registers in TRAPFRAME
    sd ra, 40(a0)
    sd sp, 48(a0)
    sd gp, 56(a0)
    sd tp, 64(a0)
    sd t0, 72(a0)
    sd t1, 80(a0)
    sd t2, 88(a0)
    sd s0, 96(a0)
    sd s1, 104(a0)
    sd a1, 120(a0)
    sd a2, 128(a0)
    sd a3, 136(a0)
    sd a4, 144(a0)
    sd a5, 152(a0)
    sd a6, 160(a0)
    sd a7, 168(a0)
    sd s2, 176(a0)
    sd s3, 184(a0)
    sd s4, 192(a0)
    sd s5, 200(a0)
    sd s6, 208(a0)
    sd s7, 216(a0)
    sd s8, 224(a0)
    sd s9, 232(a0)
    sd s10, 240(a0)
    sd s11, 248(a0)
    sd t3, 256(a0)
    sd t4, 264(a0)
    sd t5, 272(a0)
    sd t6, 280(a0)  
    
    csrr t0, sscratch
    
    sd t0, 112(a0)
    ld sp, 8(a0)
    ld tp, 32(a0)
    ld t0, 16(a0)
    ld t1, 0(a0)

    sfence.vma zero, zero   //刷新TLB

    csrw satp, t1           //
    sfence.vma zero, zero   //

    jalr t0                 //JUMP TO t0(trap.c)

userret:
    sfence.vma zero, zero
    csrw satp, a0
    sfence.vma zero, zero

    li a0, TRAPFRAME

    # restore all but a0 from TRAPFRAME
    ld ra, 40(a0)
    ld sp, 48(a0)
    ld gp, 56(a0)
    ld tp, 64(a0)
    ld t0, 72(a0)
    ld t1, 80(a0)
    ld t2, 88(a0)
    ld s0, 96(a0)
    ld s1, 104(a0)
    ld a1, 120(a0)
    ld a2, 128(a0)
    ld a3, 136(a0)
    ld a4, 144(a0)
    ld a5, 152(a0)
    ld a6, 160(a0)
    ld a7, 168(a0)
    ld s2, 176(a0)
    ld s3, 184(a0)
    ld s4, 192(a0)
    ld s5, 200(a0)
    ld s6, 208(a0)
    ld s7, 216(a0)
    ld s8, 224(a0)
    ld s9, 232(a0)
    ld s10, 240(a0)
    ld s11, 248(a0)
    ld t3, 256(a0)
    ld t4, 264(a0)
    ld t5, 272(a0)
    ld t6, 280(a0)

    # restore user a0
    ld a0, 112(a0)
    
    # return to user mode and user pc.
    # usertrapret() set up sstatus and sepc.
    sret

2.2 保存现场uservec

进入内核态的第一件事，就是把子进程此刻用户态的所有寄存器（如 a0 存放的文件路径指针，a1 存放的打开模式等），一股脑地保存到 p->trapframe（陷入帧）中。

复制代码

struct trapframe {
  /*   0 */ uint64 kernel_satp;   // kernel page table
  /*   8 */ uint64 kernel_sp;     // top of process's kernel stack
  /*  16 */ uint64 kernel_trap;   // usertrap()
  /*  24 */ uint64 epc;           // saved user program counter
  /*  32 */ uint64 kernel_hartid; // saved kernel tp
  /*  40 */ uint64 ra;
  /*  48 */ uint64 sp;
  /*  56 */ uint64 gp;
  /*  64 */ uint64 tp;
  /*  72 */ uint64 t0;
  /*  80 */ uint64 t1;
  /*  88 */ uint64 t2;
  /*  96 */ uint64 s0;
  /* 104 */ uint64 s1;
  /* 112 */ uint64 a0;
  /* 120 */ uint64 a1;
  /* 128 */ uint64 a2;
  /* 136 */ uint64 a3;
  /* 144 */ uint64 a4;
  /* 152 */ uint64 a5;
  /* 160 */ uint64 a6;
  /* 168 */ uint64 a7;
  /* 176 */ uint64 s2;
  /* 184 */ uint64 s3;
  /* 192 */ uint64 s4;
  /* 200 */ uint64 s5;
  /* 208 */ uint64 s6;
  /* 216 */ uint64 s7;
  /* 224 */ uint64 s8;
  /* 232 */ uint64 s9;
  /* 240 */ uint64 s10;
  /* 248 */ uint64 s11;
  /* 256 */ uint64 t3;
  /* 264 */ uint64 t4;
  /* 272 */ uint64 t5;
  /* 280 */ uint64 t6;
};

2.3 恢复现场userret

当内核态子进程在硬盘上建好文件后，会把文件描述符（比如 3）写进 trapframe->a0 中。随后执行 sret 指令退回用户态，子进程醒来，仿佛什么都没发生，只是拿到了返回值 3。

3.阶段三打开文件表ofile

当程序执行 open 时，操作系统在底层构建了一条三级跳的映射链条，这种设计实现了用户态与物理硬件的绝对隔离。

文件描述符fd

程序拿到的是一个简单的整数 fd。这个 fd 仅仅是该程序专属的 ofile 数组的下标。程序只能操作这个数字，无法越权触碰内核的内存指针，以此保证系统安全。

动态运行时的 struct file

内核通过 ofile 数组的下标找到对应的 struct file。设置这一层是因为同一个文件可以被并发访问。struct file 独立记录了本次 open 操作的专属上下文，例如当前拥有的是只读还是读写权限，以及具体的 off 偏移量。

物理文件真身 inode

struct file 内部的 ip 指针最终指向内存中唯一的 inode。inode 包含了物理文件在底层磁盘上的真实扇区分布信息与元数据。

4.阶段四 inode表、sleeplock

当程序调用 write 准备将数据刷入磁盘时，必须面对 CPU 高速运算与外设低速运转之间绝大的速度差。

获取独占写入权

内核找到目标 inode 后，程序必须申请该 inode 绑定的 sleeplock。如果此时有其他任务正在写这个文件，当前程序绝对不能使用 spinlock。因为 spinlock 会导致 CPU 空转，在漫长的磁盘 I/O 期间，空转是对算力的极大浪费。

主动让出 CPU 核心

拿不到 sleeplock 的程序会被内核变更为 SLEEPING 状态，同时内核将目标 inode 的内存地址记录在该程序的 chan 字段中，以此标记它具体在等待哪把锁。随后，程序调用 swtch 触发上下文切换，让出当前 CPU 核心去执行其他处于 RUNNABLE 状态的任务。详细实现见kernel/sleeplock.c

硬件中断唤醒

外设完成写入动作后，会向 CPU 触发硬件中断。内核的中断处理例程随即介入，检索所有处于 SLEEPING 状态且 chan 字段匹配该 inode 地址的程序，将它们的状态回写为 RUNNABLE。调度器随后会重新安排其执行。

5.阶段五清理资源

复制代码

...
        close(fd);
        exit(0);
    } 
    else {
        wait(NULL); 
...

子进程调用 exit()：

内核再次获取 p->lock（自旋锁），遍历 p->ofile 数组，将所有打开的 struct file 的引用计数减一（如果减到0就清理 inode）。最后把状态改为 ZOMBIE。

父进程调用 wait()：

由于父子并发，父进程可能早就在 wait() 里等待了。发现子进程没死，父进程会通过 sleep() 机制主动交出 CPU（底层依赖 p->lock 保证检查状态和睡眠的原子性）。当子进程变成 ZOMBIE 后唤醒父进程，父进程终于读取子进程的残存状态，并将其进程表项彻底抹平为 UNUSED。

XV6操作系统：proc机制学习笔记

1.阶段一 fork

1.1 单核CPU情形

1.2 多核CPU情形

2.阶段二 trapframe

2.1 用户态切换内核态（trapframe.S）

2.2 保存现场uservec

2.3 恢复现场userret

3.阶段三 打开文件表ofile

4.阶段四 inode表、sleeplock

5.阶段五 清理资源

3.阶段三打开文件表ofile

5.阶段五清理资源