Linux深入学习内核 - 中断与异常（下）

软中断，`Tasklet`和`Work Queue`

由内核执行的几个任务之间有一些不是紧急的，他们可以被延缓一段时间！把可延迟的中断从中断处理程序中抽出来，有利于使得内核保持较短的响应时间，所以我们现在使用以下面的这些结构，来把这样的非紧急的中断处理函数抽象出来！下面列出还在使用三个的机制：

软中断（softirq）：内核2.3引入，是最基本、最优先的软中断处理形式，为了避免名字冲突，本文中将这种子类型的软中断叫softirq。

tasklet：其底层使用softirq机制实现，提供了一种用户方便使用的软中方式，为软中断提供了很好的扩展性。（封装了soft_irq）

work queue：前两种软中断执行时是禁止抢占的（softirq的ksoftirq除外），对于用户进程不友好。如果在softirq执行时间过长，会继续推后到work queue中执行，work queue执行处于进程上下文，其可被抢占，也可以被调度，如果软中断需要执行睡眠、阻塞，直接选择work queue。

软中断

前已注册的软中断有10种，定义为一个全局数组：

复制代码

static struct softirq_action softirq_vec[NR_SOFTIRQS];
 
enum {
    HI_SOFTIRQ = 0, /* 优先级高的tasklets */
    TIMER_SOFTIRQ, /* 定时器的下半部 */
    NET_TX_SOFTIRQ, /* 发送网络数据包 */
    NET_RX_SOFTIRQ, /* 接收网络数据包 */
    BLOCK_SOFTIRQ, /* BLOCK装置 */
    BLOCK_IOPOLL_SOFTIRQ,
    TASKLET_SOFTIRQ, /* 正常优先级的tasklets */
    SCHED_SOFTIRQ, /* 调度程序 */
    HRTIMER_SOFTIRQ, /* 高分辨率定时器 */
    RCU_SOFTIRQ, /* RCU锁定 */
    NR_SOFTIRQS /* 10 */
};

（2）注册软中断处理函数

复制代码

/**
 * @nr: 软中断的索引号
 * @action: 软中断的处理函数
 */
void open_softirq(int nr, void (*action) (struct softirq_action *))
{
    softirq_vec[nr].action = action;
}

例如：

复制代码

open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);

（3）触发软中断

调用raise_softirq()来触发软中断。

复制代码

void raise_softirq(unsigned int nr)
{
    unsigned long flags;
    local_irq_save(flags);
    raise_softirq_irqoff(nr);
    local_irq_restore(flags);
}
 
/* This function must run with irqs disabled */
inline void rasie_softirq_irqsoff(unsigned int nr)
{
    __raise_softirq_irqoff(nr);
 
    /* If we're in an interrupt or softirq, we're done
     * (this also catches softirq-disabled code). We will
     * actually run the softirq once we return from the irq
     * or softirq.
     * Otherwise we wake up ksoftirqd to make sure we
     * schedule the softirq soon.
     */
    if (! in_interrupt()) /* 如果不处于硬中断或软中断 */
        wakeup_softirqd(void); /* 唤醒ksoftirqd/n进程 */
}

Percpu变量irq_cpustat_t中的__softirq_pending是等待处理的软中断的位图，通过设置此变量即可告诉内核该执行哪些软中断。

复制代码

static inline void __rasie_softirq_irqoff(unsigned int nr)
{
    trace_softirq_raise(nr);
    or_softirq_pending(1UL << nr);
}
 
typedef struct {
    unsigned int __softirq_pending;
    unsigned int __nmi_count; /* arch dependent */
} irq_cpustat_t;
 
irq_cpustat_t irq_stat[];
#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)

唤醒ksoftirqd内核线程处理软中断。

复制代码

static void wakeup_softirqd(void)
{
    /* Interrupts are disabled: no need to stop preemption */
    struct task_struct *tsk = __get_cpu_var(ksoftirqd);
 
    if (tsk && tsk->state != TASK_RUNNING)
        wake_up_process(tsk);
}

在下列地方，待处理的软中断会被检查和执行：

a. 从一个硬件中断代码处返回时

b. 在ksoftirqd内核线程中

c. 在那些显示检查和执行待处理的软中断的代码中，如网络子系统中

而不管是用什么方法唤起，软中断都要在do_softirq()中执行。如果有待处理的软中断，do_softirq()会循环遍历每一个，调用它们的相应的处理程序。

在中断处理程序中触发软中断是最常见的形式。中断处理程序执行硬件设备的相关操作，然后触发相应的软中断，最后退出。内核在执行完中断处理程序以后，马上就会调用do_softirq()，于是软中断开始执行中断处理程序完成剩余的任务。

下面来看下do_softirq()的具体实现。

复制代码

asmlinkage void do_softirq(void)
{
    __u32 pending;
    unsigned long flags;
 
    /* 如果当前已处于硬中断或软中断中，直接返回 */
    if (in_interrupt()) 
        return;
 
    local_irq_save(flags);
    pending = local_softirq_pending();
    if (pending) /* 如果有激活的软中断 */
        __do_softirq(); /* 处理函数 */
    local_irq_restore(flags);
}

复制代码

/* We restart softirq processing MAX_SOFTIRQ_RESTART times,
 * and we fall back to softirqd after that.
 * This number has been established via experimentation.
 * The two things to balance is latency against fairness - we want
 * to handle softirqs as soon as possible, but they should not be
 * able to lock up the box.
 */
asmlinkage void __do_softirq(void)
{
    struct softirq_action *h;
    __u32 pending;
    /* 本函数能重复触发执行的次数，防止占用过多的cpu时间 */
    int max_restart = MAX_SOFTIRQ_RESTART;
    int cpu;
 
    pending = local_softirq_pending(); /* 激活的软中断位图 */
    account_system_vtime(current);
    /* 本地禁止当前的软中断 */
    __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET);
    lockdep_softirq_enter(); /* current->softirq_context++ */
    cpu = smp_processor_id(); /* 当前cpu编号 */
 
restart:
    /* Reset the pending bitmask before enabling irqs */
    set_softirq_pending(0); /* 重置位图 */
    local_irq_enable();
    h = softirq_vec;
    do {
        if (pending & 1) {
            unsigned int vec_nr = h - softirq_vec; /* 软中断索引 */
            int prev_count = preempt_count();
            kstat_incr_softirqs_this_cpu(vec_nr);
 
            trace_softirq_entry(vec_nr);
            h->action(h); /* 调用软中断的处理函数 */
            trace_softirq_exit(vec_nr);
 
            if (unlikely(prev_count != preempt_count())) {
                printk(KERN_ERR "huh, entered softirq %u %s %p" "with preempt_count %08x,"
                    "exited with %08x?\n", vec_nr, softirq_to_name[vec_nr], h->action, prev_count,
                    preempt_count());
            }
            rcu_bh_qs(cpu);
        }
        h++;
        pending >>= 1;
    } while(pending);
 
    local_irq_disable();
    pending = local_softirq_pending();
    if (pending & --max_restart) /* 重复触发 */
        goto restart;
 
    /* 如果重复触发了10次了，接下来唤醒ksoftirqd/n内核线程来处理 */
    if (pending)
        wakeup_softirqd(); 
 
    lockdep_softirq_exit();
    account_system_vtime(current);
    __local_bh_enable(SOFTIRQ_OFFSET);
}

（4）ksoftirqd内核线程

内核不会立即处理重新触发的软中断。当大量软中断出现的时候，内核会唤醒一组内核线程来处理。这些线程的优先级最低(nice值为19)，这能避免它们跟其它重要的任务抢夺资源。但它们最终肯定会被执行，所以这个折中的方案能够保证在软中断很多时用户程序不会因为得不到处理时间而处于饥饿状态，同时也保证过量的软中断最终会得到处理。

每个处理器都有一个这样的线程，名字为ksoftirqd/n，n为处理器的编号。

复制代码

static int run_ksoftirqd(void *__bind_cpu)
{
    set_current_state(TASK_INTERRUPTIBLE);
    current->flags |= PF_KSOFTIRQD; /* I am ksoftirqd */
 
    while(! kthread_should_stop()) {
        preempt_disable();
 
        if (! local_softirq_pending()) { /* 如果没有要处理的软中断 */
            preempt_enable_no_resched();
            schedule();
            preempt_disable():
        }
 
        __set_current_state(TASK_RUNNING);
 
        while(local_softirq_pending()) {
            /* Preempt disable stops cpu going offline.
             * If already offline, we'll be on wrong CPU: don't process.
             */
             if (cpu_is_offline(long)__bind_cpu))/* 被要求释放cpu */
                 goto wait_to_die;
 
            do_softirq(); /* 软中断的统一处理函数 */
 
            preempt_enable_no_resched();
            cond_resched();
            preempt_disable();
            rcu_note_context_switch((long)__bind_cpu);
        }
 
        preempt_enable();
        set_current_state(TASK_INTERRUPTIBLE);
    }
 
    __set_current_state(TASK_RUNNING);
    return 0;
 
wait_to_die:
    preempt_enable();
    /* Wait for kthread_stop */
    set_current_state(TASK_INTERRUPTIBLE);
    while(! kthread_should_stop()) {
        schedule();
        set_current_state(TASK_INTERRUPTIBLE);
    }
 
    __set_current_state(TASK_RUNNING);
    return 0;
}

Tasklet API

动态初始化函数：

复制代码

void tasklet_init(struct tasklet_struct *t,void (*func)(unsigned long), unsigned long data)

t: struct tasklet_struct 结构指针
func：小任务函数
data：传递给工作函数的实际参数

静态初始化：静态初始化DECLARE_TASKLET(name, func, data)，定义一个名字为 name 的结构变量，并且使用 func,data对结构进行初始化，这个宏定义的 tasklet 是可调度的。静态初始化DECLARE_TASKLET_DISABLED(name, func, data)和DECLARE_TASKLET(name, func, data)，不同是它开始不能被调度。必须先把 count 设置为0,才可以调度

复制代码

name：struct tasklet_struct的名字
func：tasklet函数指针
data：传递给func函数的参数

激活/取消激活 tasklet

复制代码

void tasklet_disable(struct tasklet_struct *t)   // 把 count 设置为1
void tasklet_enable (struct tasklet_struct *t)   // 把count 设置为0

调度函数

复制代码

void  tasklet_schedule (struct tasklet_struct *t)

调度某个指定的tasklet小任务，调用后tasklet关联的函数会执行.一旦执行，则会在适当时候去执行 tasklet_struct 绑定的函数。对同一个 struct tasklet_struct 连续调度多次，效果等同一次（前提条件：当前一次调用，绑定函数还没有执行）。

5）kill掉函数（取消任务）

复制代码

tasklet_kill(struct tasklet_struct *t);

6） tasklet和普通工作队列区别：

它所绑定的函数不能休眠

它的响应速度高于普通工作队列。

tasklet 微线程的编程步骤：

taskle 内核机制实现过程是非常复杂的，但是对于驱动开发者来说，重点是掌握如果使用内核已经给我们实现好的tasklet机制。tasklet编程其实只有简单的几步，下面我们总结一下tasklet机制的编程步骤。

1. 定义tasklet 工作函数

2. 定义tasklet 结构变量

定义分有静态定义和动态定义两种方式：

复制代码

// 动态定义：
struct tasklet_struct my_tasklet;
// 静态定义：
DECLARE_TASKLET(my_tasklet, my_tasklet_function, data);

3. 初始化tasklet结构，绑定工作函数

如果上一步是采用静态定义，则这一步不用再做，跳过。如果是采用动态定义tasklet，则使用tasklet_init()函数进行初始化以及绑定。

复制代码

tasklet_init(&my_tasklet, my_tasklet_function, data)

4. 在适当的地方调度工作函数

tasklet一般是用于处理中断的下半部的，所以一般在中断的上半部调度tasklet工作函数。

复制代码

tasklet_schedule(&my_tasklet);

5. 销毁tasklet工作任务

在确定不再使用tasklet时，应该在适当的地方调用tasklet_kill()函数销毁tasklet任务，释放资源，这个适当的地方一般的tasklet初始化地方是相反的，比如，如果是在模块初始化函数初始化了tasklet,则相应地是在模块卸载函数调用tasklet_kill函数来销毁tasklet任务。

复制代码

tasklet_kill(&my_tasklet);

从中断和异常返回

我们用《深入理解Linux内核》的一张大图来收尾。

我们的ret_from_intr和ret_from_exception本质上等价于：

入口点

复制代码

ret_from_exception:
    cli    // 只有从异常返回时才使用 cli，禁用本地中断
ret_from_intr:
    movl $-8192, %ebp  // 将当前 thread_info 描述符的地址装载到 ebp 寄存器
    andl %esp, %ebp
    movl 0x30(%esp), %eax
    movb 0x2c(%esp), %al

    // 根据发生中断或异常压入栈中的 cs 和 eflags 寄存器的值，
    // 确定中断的程序在中断时是否运行在用户态
    testl $0x0002003, %eax  
    jnz resume_userspace
    jpm resume_kernel

恢复内核控制路径

复制代码

rusume_kernel:
    cli
    cmpl $0, 0x14(%ebp)  // 如果 thread_info 描述符的 preempt_count 字段为0（运行内核抢占）
    jz need_resched      // 跳到 need_resched
restore_all:       // 否则，被中断的程序重新开始执行
    popl %ebx
    popl %ecx
    popl %edx
    popl %esi
    popl %edi
    popl %ebp
    popl %eax
    popl %ds
    popl %es
    addl $4, %esp
    iret   // 结束控制

检查内核抢占

复制代码

need_resched:
    movl 0x8(%ebp), %ecx
    testb $(1<<TIF_NEED_RESCHED), %cl  // 如果 current->thread_info 的 flags 字段中的 TIF_NEED_RESCHED == 0，没有需要切换的进程
    jz restore_all                     // 因此跳到 restore_all
    testl $0x00000200, 0x30(%ebp)      // 如果正在被恢复的内核控制路径是在禁用本地 CPU 的情况下运行
    jz restore_all                     // 也跳到 restore_all，否则进程切换可能回破坏内核数据结构
    call preempt_schedule_irq          // 进程切换，设置 preempt_count 字段的 PREEMPT_ACTIVE 标志，大内核锁计数器暂时设置为 -1，调用 schedule()
    jmp need_resched

恢复用户态程序

复制代码

resume_userspace:
    cli  // 禁用本地中断
    movl 0x8(%ebp), %ecx

    // 检测 current->thread_info 的 flags 字段，
    // 如果只设置了 TIF_SYSCALL_TRACE，TIF_SYSCALL_AUDIT 或 TIF_SINGLESTEP 标志，
    // 跳到 restore_all
    andl $0x0000ff6e, %ecx
    je restore_all
    jmp work_pending

检测重调度标志

复制代码

work_pending:
    testb $(1<<TIF_NEED_RESCHED), %cl
    jz work_notifysig
work_resched:
    call schedule  // 如果进程切换请求被挂起，选择另外 一个进程运行
    cli
    jmp resume_userspace  // 当前面的进程要恢复时

处理挂起信号、虚拟 8086 模式和单步执行

复制代码

work_notifysig:
    movl %esp, %eax
    testl $0x00020000, 0x30(%esp)
    je 1f

// 如果用户态程序 eflags 寄存器的 VM 控制标志被设置
work_notifysig_v86:
    pushl %ecx
    call save_v86_state    // 在用户态地址空间建立虚拟8086模式的数据结构
    popl %ecx
    movl %eax, %esp
1:
    xorl %edx, %edx
    call do_notify_resume  // 处理挂起信号和单步执行
    jmp restore_all        // 恢复被中断的程序

Reference

80x86中断 - 知乎 (zhihu.com)

Linux内核19-中断描述符表IDT的初始化-腾讯云开发者社区-腾讯云 (tencent.com)

Linux 中断 ------ GIC (数据结构 irq_domain/irq_desc/irq_data/irq_chip/irqaction)_irq_data、irq_chip、irq_domain和irq_desc-CSDN博客

Linux内核硬中断 / 软中断的原理和实现-腾讯云开发者社区-腾讯云 (tencent.com)

linux内核之tasklet使用_tasklet 改绑定-CSDN博客

深入理解 Linux 内核---中断和异常_ret_from_exception-CSDN博客

Linux深入学习内核 - 中断与异常（下）

软中断，Tasklet和Work Queue

软中断

Tasklet API

tasklet 微线程的编程步骤：

从中断和异常返回

Reference

软中断，`Tasklet`和`Work Queue`