Asterinas 进程启动与切换
进程启动
进程创建:
Rust
pub fn spawn_user_process(
executable_path: &str,
argv: Vec,
envp: Vec,
) -> Result<Arc> {
// spawn user process should give an absolute path
debug_assert!(executable_path.starts_with('/'));
let process = Process::create_user_process(executable_path, argv, envp)?;
open_ntty_as_controlling_terminal(&process)?;
process.run();
Ok(process)
}
Rust
fn create_user_process(
executable_path: &str,
argv: Vec,
envp: Vec,
) -> Result<Arc> {
let process_builder = {
let pid = allocate_tid();
let parent = Weak::new();
let credentials = Credentials::new_root();
let mut builder = ProcessBuilder::new(pid, executable_path, parent);
builder.argv(argv).envp(envp).credentials(credentials);
builder
};
let process = process_builder.build()?;
// Lock order: session table -> group table -> process table -> group of process
// -> group inner -> session inner
let mut session_table_mut = process_table::session_table_mut();
let mut group_table_mut = process_table::group_table_mut();
let mut process_table_mut = process_table::process_table_mut();
// Creates new group
let group = ProcessGroup::new(process.clone());
*process.process_group.lock() = Arc::downgrade(&group);
group_table_mut.insert(group.pgid(), group.clone());
// Creates new session
let session = Session::new(group.clone());
group.inner.lock().session = Arc::downgrade(&session);
session.inner.lock().leader = Some(process.clone());
session_table_mut.insert(session.sid(), session);
process_table_mut.insert(process.pid(), process.clone());
Ok(process)
}
创建线程:
Rust
pub fn build(self) -> Result<Arc> {
self.check_build()?;
let Self {
pid,
executable_path,
parent,
main_thread_builder,
argv,
envp,
process_vm,
file_table,
fs,
umask,
resource_limits,
sig_dispositions,
credentials,
} = self;
let process_vm = process_vm.or_else(|| Some(ProcessVm::alloc())).unwrap();
let file_table = file_table
.or_else(|| Some(Arc::new(Mutex::new(FileTable::new_with_stdio()))))
.unwrap();
let fs = fs
.or_else(|| Some(Arc::new(RwMutex::new(FsResolver::new()))))
.unwrap();
let umask = umask
.or_else(|| Some(Arc::new(RwLock::new(FileCreationMask::default()))))
.unwrap();
let resource_limits = resource_limits
.or_else(|| Some(ResourceLimits::default()))
.unwrap();
let sig_dispositions = sig_dispositions
.or_else(|| Some(Arc::new(Mutex::new(SigDispositions::new()))))
.unwrap();
let process = {
let threads = Vec::new();
Arc::new(Process::new(
pid,
parent,
threads,
executable_path.to_string(),
process_vm,
file_table,
fs,
umask,
sig_dispositions,
resource_limits,
))
};
let thread = if let Some(thread_builder) = main_thread_builder {
let builder = thread_builder.process(Arc::downgrade(&process));
builder.build()
} else {
Thread::new_posix_thread_from_executable(
pid,
credentials.unwrap(),
process.vm(),
&process.fs().read(),
executable_path,
Arc::downgrade(&process),
argv.unwrap(),
envp.unwrap(),
)?
};
process.threads().lock().push(thread);
process.set_runnable();
Ok(process)
}
Rust
impl PosixThreadExt for Thread {
/// This function should only be called when launch shell()
fn new_posix_thread_from_executable(
tid: Tid,
credentials: Credentials,
process_vm: &ProcessVm,
fs_resolver: &FsResolver,
executable_path: &str,
process: Weak,
argv: Vec,
envp: Vec,
) -> Result<Arc> {
let elf_file = {
let fs_path = FsPath::new(AT_FDCWD, executable_path)?;
fs_resolver.lookup(&fs_path)?
};
let (_, elf_load_info) =
load_program_to_vm(process_vm, elf_file, argv, envp, fs_resolver, 1)?;
let vm_space = process_vm.root_vmar().vm_space().clone();
let mut cpu_ctx = UserContext::default();
cpu_ctx.set_rip(elf_load_info.entry_point() as _);
cpu_ctx.set_rsp(elf_load_info.user_stack_top() as _);
let user_space = Arc::new(UserSpace::new(vm_space, cpu_ctx));
let thread_name = Some(ThreadName::new_from_executable_path(executable_path)?);
let thread_builder = PosixThreadBuilder::new(tid, user_space, credentials)
.thread_name(thread_name)
.process(process);
Ok(thread_builder.build())
}
从ELF文件中解析装载到内存中,并且找到ELF文件的入口地址,并且设置相应的栈:
Rust
cpu_ctx.set_rip(elf_load_info.entry_point() as _);
cpu_ctx.set_rsp(elf_load_info.user_stack_top() as _);
vm_space与cpu_ctx都被塞进user_space 中。
Rust
let user_space = Arc::new(UserSpace::new(vm_space, cpu_ctx));
创建新线程:
Rust
pub fn build(self) -> Arc {
let Self {
tid,
user_space,
process,
credentials,
thread_name,
set_child_tid,
clear_child_tid,
sig_mask,
sig_queues,
is_main_thread,
} = self;
let thread = Arc::new_cyclic(|thread_ref| {
let task = create_new_user_task(user_space, thread_ref.clone());
let status = ThreadStatus::Init;
let posix_thread = PosixThread {
process,
is_main_thread,
name: Mutex::new(thread_name),
set_child_tid: Mutex::new(set_child_tid),
clear_child_tid: Mutex::new(clear_child_tid),
credentials,
sig_mask: Mutex::new(sig_mask),
sig_queues: Mutex::new(sig_queues),
sig_context: Mutex::new(None),
sig_stack: Mutex::new(None),
robust_list: Mutex::new(None),
};
Thread::new(tid, task, posix_thread, status)
});
thread_table::add_thread(thread.clone());
thread
}
这里面核心是创建task,task被丢到thread中:
Rust
let task = create_new_user_task(user_space, thread_ref.clone());
let status = ThreadStatus::Init;
let posix_thread = PosixThread {
process,
is_main_thread,
name: Mutex::new(thread_name),
set_child_tid: Mutex::new(set_child_tid),
clear_child_tid: Mutex::new(clear_child_tid),
credentials,
sig_mask: Mutex::new(sig_mask),
sig_queues: Mutex::new(sig_queues),
sig_context: Mutex::new(None),
sig_stack: Mutex::new(None),
robust_list: Mutex::new(None),
};
Thread::new(tid, task, posix_thread, status)
create_new_user_task实现:
Rust
pub fn create_new_user_task(user_space: Arc, thread_ref: Weak) -> Arc {
fn user_task_entry() {
let cur = Task::current();
let user_space = cur.user_space().expect("user task should have user space");
let mut user_mode = UserMode::new(user_space);
debug!(
"[Task entry] rip = 0x{:x}",
user_mode.context().instruction_pointer()
);
debug!(
"[Task entry] rsp = 0x{:x}",
user_mode.context().stack_pointer()
);
debug!(
"[Task entry] rax = 0x{:x}",
user_mode.context().syscall_ret()
);
loop {
let user_event: UserEvent = user_mode.execute();
let context = user_mode.context_mut();
// handle user event:
handle_user_event(user_event, context);
let current_thread = current_thread!();
// should be do this comparison before handle signal?
if current_thread.status().lock().is_exited() {
break;
}
handle_pending_signal(context).unwrap();
if current_thread.status().lock().is_exited() {
debug!("exit due to signal");
break;
}
// If current is suspended, wait for a signal to wake up self
while current_thread.status().lock().is_stopped() {
Thread::yield_now();
debug!("{} is suspended.", current_thread.tid());
handle_pending_signal(context).unwrap();
}
// a preemption point after handling user event.
preempt();
}
debug!("exit user loop");
// FIXME: This is a work around: exit in kernel task entry may be not called. Why this will happen?
Task::current().exit();
}
TaskOptions::new(user_task_entry)
.data(thread_ref)
.user_space(Some(user_space))
.build()
.expect("spawn task failed")
}
从user_space钟创建user_mode
Rust
let mut user_mode = UserMode::new(user_space);
其实就是将user_space中的context转移给user_mode:
Rust
pub fn new(user_space: &'a Arc) -> Self {
Self {
current: Task::current(),
user_space,
context: user_space.init_ctx,
}
}
user_task_entry作为当前task的用户入口。同时构建task的kernel侧入口kernel_task_entry,kernel_task_entry被设置到task的TaskContext中:
Rust
pub fn build(self) -> Result<Arc> {
/// all task will entering this function
/// this function is mean to executing the task_fn in Task
fn kernel_task_entry() {
let current_task = current_task()
.expect("no current task, it should have current task in kernel task entry");
current_task.func.call(());
current_task.exit();
}
let result = Task {
func: self.func.unwrap(),
data: self.data.unwrap(),
user_space: self.user_space,
task_inner: Mutex::new(TaskInner {
task_status: TaskStatus::Runnable,
ctx: TaskContext::default(),
}),
exit_code: 0,
kstack: KernelStack::new_with_guard_page()?,
link: LinkedListAtomicLink::new(),
priority: self.priority,
cpu_affinity: self.cpu_affinity,
};
result.task_inner.lock().task_status = TaskStatus::Runnable;
result.task_inner.lock().ctx.rip = kernel_task_entry as usize;
result.task_inner.lock().ctx.regs.rsp =
(crate::vm::paddr_to_vaddr(result.kstack.end_paddr())) as u64;
Ok(Arc::new(result))
}
从上面的逻辑很清晰的流程,从创建进程process到创建线程thread到创建任务task.
Task任务切换
从上面的流程中,已经创建好了task。现在看看task如何切换以及执行。
Rust
process.run();
进程创建好后,开始执行,线程开始执行
Rust
pub fn run(&self) {
let threads = self.threads.lock();
// when run the process, the process should has only one thread
debug_assert!(threads.len() == 1);
debug_assert!(self.is_runnable());
let thread = threads[0].clone();
// should not hold the lock when run thread
drop(threads);
thread.run();
}
thread的task开始执行:
Rust
pub fn run(&self) {
self.status.lock().set_running();
self.task.run();
}
将当前task放进系统的task列表中:
Rust
pub fn add_task(task: Arc) {
GLOBAL_SCHEDULER.lock_irq_disabled().enqueue(task);
}
调度:
Rust
pub fn schedule() {
if let Some(task) = fetch_task() {
switch_to_task(task);
}
}
任务切换:
Rust
fn switch_to_task(next_task: Arc) {
if !PREEMPT_COUNT.is_preemptive() {
panic!(
"Calling schedule() while holding {} locks",
PREEMPT_COUNT.num_locks()
);
//GLOBAL_SCHEDULER.lock_irq_disabled().enqueue(next_task);
//return;
}
let current_task_option = current_task();
let next_task_cx_ptr = &next_task.inner_ctx() as *const TaskContext;
let current_task: Arc;
let current_task_cx_ptr: *mut TaskContext = match current_task_option {
None => PROCESSOR.lock().get_idle_task_cx_ptr(),
Some(current_task) => {
if current_task.status() == TaskStatus::Runnable {
GLOBAL_SCHEDULER
.lock_irq_disabled()
.enqueue(current_task.clone());
}
&mut current_task.inner_exclusive_access().ctx as *mut TaskContext
}
};
// change the current task to the next task
PROCESSOR.lock().current = Some(next_task.clone());
unsafe {
context_switch(current_task_cx_ptr, next_task_cx_ptr);
}
}
这个里面task切换的时候使用的是TaskContext,而根据前面分析可得知TaskContext中存放的是kernel_task_entry:
Rust
result.task_inner.lock().ctx.rip = kernel_task_entry as usize;
result.task_inner.lock().ctx.regs.rsp =
(crate::vm::paddr_to_vaddr(result.kstack.end_paddr())) as u64;
context_switch实现:
Rust
.text
.global context_switch
.code64
context_switch: # (cur: *mut TaskContext, nxt: *TaskContext)
Save cur's register
mov rax, [rsp] # return address
mov [rdi + 56], rax # 56 = offsetof(Context, rip)
mov [rdi + 0], rsp
mov [rdi + 8], rbx
mov [rdi + 16], rbp
mov [rdi + 24], r12
mov [rdi + 32], r13
mov [rdi + 40], r14
mov [rdi + 48], r15
Restore nxt's registers
mov rsp, [rsi + 0]
mov rbx, [rsi + 8]
mov rbp, [rsi + 16]
mov r12, [rsi + 24]
mov r13, [rsi + 32]
mov r14, [rsi + 40]
mov r15, [rsi + 48]
mov rax, [rsi + 56] # restore return address
mov [rsp], rax # for stack balance, must use mov instead of push
ret
在x86_64汇编中,函数调用使用的寄存器规则是:
•%rdi, %rsi, %rdx, %rcx,%r8, %r9 :六个寄存器,当参数少于7个时, 参数从左到右放入寄存器: rdi, rsi, rdx, rcx, r8, r9;当参数为7个以上时,前 6 个与前面一样, 但后面的依次从 "右向左" 放入栈中,即和32位汇编一样。
•那么rdi寄存器存放的是current_task_cx_ptr,而next_task_cx_ptr存放在rsi寄存器中。
•rax寄存器被放入ctx.rip = kernel_task_entry
•执行ret指令,返回地址由rax寄存器指定。
为什么rax存放的是返回地址呢?我们看看TaskContext 的定义,从这个结构体中可以看出来结构体的rip成员变量是结构体的第7个成员。因此
mov rax, [rsi + 56] # restore return address
rsi + 56指向的是第7个成员。
Rust
pub struct CalleeRegs {
pub rsp: u64,
pub rbx: u64,
pub rbp: u64,
pub r12: u64,
pub r13: u64,
pub r14: u64,
pub r15: u64,
}
#[derive(Debug, Default, Clone, Copy)]
#[repr©]
pub(crate) struct TaskContext {
pub regs: CalleeRegs,
pub rip: usize,
}
task切换完成后,下一个task回到kernel_task_entry 处执行:
Rust
fn kernel_task_entry() {
let current_task = current_task()
.expect("no current task, it should have current task in kernel task entry");
current_task.func.call(());
current_task.exit();
}
kernel_task_entry调用user_task_entry:
Rust
fn user_task_entry() {
let cur = Task::current();
let user_space = cur.user_space().expect("user task should have user space");
let mut user_mode = UserMode::new(user_space);
debug!(
"[Task entry] rip = 0x{:x}",
user_mode.context().instruction_pointer()
);
debug!(
"[Task entry] rsp = 0x{:x}",
user_mode.context().stack_pointer()
);
debug!(
"[Task entry] rax = 0x{:x}",
user_mode.context().syscall_ret()
);
loop {
let user_event: UserEvent = user_mode.execute();
let context: &mut UserContext = user_mode.context_mut();
// handle user event:
handle_user_event(user_event, context);
let current_thread = current_thread!();
// should be do this comparison before handle signal?
if current_thread.status().lock().is_exited() {
break;
}
handle_pending_signal(context).unwrap();
if current_thread.status().lock().is_exited() {
debug!("exit due to signal");
break;
}
// If current is suspended, wait for a signal to wake up self
while current_thread.status().lock().is_stopped() {
Thread::yield_now();
debug!("{} is suspended.", current_thread.tid());
handle_pending_signal(context).unwrap();
}
// a preemption point after handling user event.
preempt();
}
debug!("exit user loop");
// FIXME: This is a work around: exit in kernel task entry may be not called. Why this will happen?
Task::current().exit();
}
Rust
let user_event: UserEvent = user_mode.execute();
Rust
impl UserContextApiInternal for UserContext {
fn execute(&mut self) -> crate::user::UserEvent {
// set interrupt flag so that in user mode it can receive external interrupts
// set ID flag which means cpu support CPUID instruction
self.user_context.general.rflags |= (RFlags::INTERRUPT_FLAG | RFlags::ID).bits() as usize;
const SYSCALL_TRAPNUM: u16 = 0x100;
// return when it is syscall or cpu exception type is Fault or Trap.
loop {
self.user_context.run();
match CpuException::to_cpu_exception(self.user_context.trap_num as u16) {
Some(exception) => {
#[cfg(feature = "intel_tdx")]
if *exception == VIRTUALIZATION_EXCEPTION {
let ve_info =
tdcall::get_veinfo().expect("#VE handler: fail to get VE info\n");
handle_virtual_exception(self.general_regs_mut(), &ve_info);
continue;
}
if exception.typ == CpuExceptionType::FaultOrTrap
|| exception.typ == CpuExceptionType::Fault
|| exception.typ == CpuExceptionType::Trap
{
break;
}
}
None => {
if self.user_context.trap_num as u16 == SYSCALL_TRAPNUM {
break;
}
}
};
call_irq_callback_functions(&self.as_trap_frame());
}
crate::arch::irq::enable_local();
if self.user_context.trap_num as u16 != SYSCALL_TRAPNUM {
self.cpu_exception_info = CpuExceptionInfo {
page_fault_addr: unsafe { x86::controlregs::cr2() },
id: self.user_context.trap_num,
error_code: self.user_context.error_code,
};
UserEvent::Exception
} else {
UserEvent::Syscall
}
}
最终调用到syscall_return,UserContext作为参数,rdi寄存器指向UserContext。
Rust
.global syscall_return
syscall_return:
disable interrupt
cli
# save callee-saved registers
mov ecx, 0xC0000100
rdmsr
shl rdx, 32
or rax, rdx
push rax # push fsbase
push r15
push r14
push r13
push r12
push rbp
push rbx
push rdi
push rdi # keep rsp 16 bytes align
mov gs:4, rsp # store kernel rsp -> TSS.sp0
mov rsp, rdi # set rsp = bottom of trap frame
# pop fsbase gsbase
swapgs # store kernel gsbase
mov ecx, 0xC0000100
mov edx, [rsp + 18*8+4]
mov eax, [rsp + 18*8]
wrmsr # pop fsbase
mov ecx, 0xC0000101
mov edx, [rsp + 19*8+4]
mov eax, [rsp + 19*8]
wrmsr # pop gsbase
pop rax
pop rbx
pop rcx
pop rdx
pop rsi
pop rdi
pop rbp
pop r8 # skip rsp
pop r8
pop r9
pop r10
pop r11
pop r12
pop r13
pop r14
pop r15
# rip
# rflags
# fsbase
# gsbase
# trap_num
# error_code
# determain sysret or iret
cmp dword ptr [rsp + 4*8], 0x100 # syscall?
je sysret
iret:
get user cs from STAR MSR
mov ecx, 0xC0000081
rdmsr # msr[ecx] => edx:eax
shr edx, 16 # dx = user_cs32
lea ax, [edx + 8] # ax = user_ss
add dx, 16 # dx = user_cs64
# construct trap frame
push rax # push ss
push [rsp - 8*8] # push rsp
push [rsp + 3*8] # push rflags
push rdx # push cs
push [rsp + 4*8] # push rip
# recover rcx, rdx, rax
mov rax, [rsp - 11*8]
mov rcx, [rsp - 9*8]
mov rdx, [rsp - 8*8]
iretq
这段代码关键在这里:
Rust
push rax # push ss
push [rsp - 88] # push rsp
push [rsp + 3 8] # push rflags
push rdx # push cs
push [rsp + 48] # push rip
iretq指令在执行的时候会从栈顶弹出返回地址,刚好对应:
Rust
push [rsp + 4 8] # push rip
而rsp值在前面被修改成了:
Rust
mov rsp, rdi # set rsp = bottom of trap frame
而rdi值是syscall_return函数调用中带进来的参数,即UserContext 变量。
Rust
pub struct UserContext {
pub general: GeneralRegs,
pub trap_num: usize,
pub error_code: usize,
}
/// General registers
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
#[repr©]
pub struct GeneralRegs {
pub rax: usize,
pub rbx: usize,
pub rcx: usize,
pub rdx: usize,
pub rsi: usize,
pub rdi: usize,
pub rbp: usize,
pub rsp: usize,
pub r8: usize,
pub r9: usize,
pub r10: usize,
pub r11: usize,
pub r12: usize,
pub r13: usize,
pub r14: usize,
pub r15: usize,
pub rip: usize,
pub rflags: usize,
pub fsbase: usize,
pub gsbase: usize,
}
mov rsp, rdi 想到与让rsp指向了UserContext 对象。后面一系列的pop与push操作都是在移动指针指向UserContext 对象里不同的成员,而这条指令
Rust
push [rsp + 4*8] # push rip
最终指向的是UserContext 对象里的rip成员,这个成员是被设置成应用程序的入口地址的:
Rust
cpu_ctx.set_rip(elf_load_info.entry_point() as _);
cpu_ctx.set_rsp(elf_load_info.user_stack_top() as _);
因此iretq执行的时候,从栈顶弹出的就是应用程序的入口地址,这样就跳进应用程序的入口地址进行执行了。
系统调用与返回
从上面内容研究到了程序进入入口地址开始执行程序的逻辑了,在应用程序执行的过程中可能会碰到一些系统调用,会导致程序重新陷入内核,由内核处理相关调用后再返回应用的用户空间继续执行。
首先会向系统写入一个系统调用统一的响应地址:
Rust
pub fn init() {
let cpuid = raw_cpuid::CpuId::new();
unsafe {
// enable syscall
instruction
assert!(cpuid
.get_extended_processor_and_feature_identifiers()
.unwrap()
.has_syscall_sysret());
Efer::update(|efer| {
efer.insert(EferFlags::SYSTEM_CALL_EXTENSIONS);
});
// flags to clear on syscall
// copy from Linux 5.0
// TF|DF|IF|IOPL|AC|NT
const RFLAGS_MASK: u64 = 0x47700;
LStar::write(VirtAddr::new(syscall_entry as usize as u64));
SFMask::write(RFlags::from_bits(RFLAGS_MASK).unwrap());
}
}
syscall_entry会作为系统调用的响应入口,也就是说当应用程序调用系统调用的时候,系统会捕捉到系统调用请求,然后就请求转到syscall_entry处理。
Rust
.global syscall_entry
syscall_entry:
syscall instruction do:
- load cs
- store rflags -> r11
- mask rflags
- store rip -> rcx
- load rip
swapgs # swap in kernel gs
mov gs:12, rsp # store user rsp -> scratch at TSS.sp1
mov rsp, gs:4 # load kernel rsp <- TSS.sp0
pop rsp # load rsp = bottom of trap frame
add rsp, 22*8 # rsp = top of trap frame
# push trap_num, error_code
push 0 # push error_code
push 0x100 # push trap_num
sub rsp, 16 # skip fsbase, gsbase
# push general registers
push r11 # push rflags
push rcx # push rip
.global trap_syscall_entry
trap_syscall_entry:
push r15
push r14
push r13
push r12
push r11
push r10
push r9
push r8
push gs:12 # push rsp
push rbp
push rdi
push rsi
push rdx
push rcx
push rbx
push rax
# push fsbase gsbase
mov ecx, 0xC0000100
rdmsr
mov [rsp + 18*8+4], edx
mov [rsp + 18*8], eax
mov ecx, 0xC0000102 # kernelgs
rdmsr
mov [rsp + 19*8+4], edx
mov [rsp + 19*8], eax
# restore callee-saved registers
mov rsp, gs:4 # load kernel rsp <- TSS.sp0
pop rbx
pop rbx
pop rbx
pop rbp
pop r12
pop r13
pop r14
pop r15
pop rax
mov ecx, 0xC0000100
mov rdx, rax
shr rdx, 32
wrmsr # pop fsbase
# go back to Rust
ret
这段代码的关键点在于:
Rust
mov rsp, gs:4 # load kernel rsp <- TSS.sp0
将kernel的栈顶放置进rsp。
而在syscall_return调用的时候:
Rust
mov gs:4, rsp # store kernel rsp -> TSS.sp0
会将kernel 栈顶保存进gs:4。
因此当kernel rsp被回复后,会进行一系列的弹栈操作:
Rust
restore callee-saved registers
mov rsp, gs:4 # load kernel rsp <- TSS.sp0
pop rbx
pop rbx
pop rbx
pop rbp
pop r12
pop r13
pop r14
pop r15
pop rax
这个跟syscall_return调用时候的压栈操作一一对应,这样弹完栈后栈顶会指向函数的返回地址,即syscall_return调用的返回地址,这样执行ret指令后程序会返回到内核里调用syscall_return处,继续处理相关的系统调用。
所以从这个角度来看整个task的切换:
Task1.kernel_task_entry-> Task1.user_task_entry -> 通过retq/sysretq返回到用户空间-> syscall_entry进入内核->Task1.user_task_entry处理系统调用之类的。
任务调度
在asterinas系统中构建有一个基于优先级的可抢占的调度器:
Rust
pub fn init() {
let preempt_scheduler = Box::new(PreemptScheduler::new());
let scheduler = Box::::leak(preempt_scheduler);
set_scheduler(scheduler);
}
该调度器将任务分成两种类型,实时任务与普通任务:
Rust
struct PreemptScheduler {
/// Tasks with a priority of less than 100 are regarded as real-time tasks.
real_time_tasks: SpinLock<LinkedList>,
/// Tasks with a priority greater than or equal to 100 are regarded as normal tasks.
normal_tasks: SpinLock<LinkedList>,
}
任务抢占,在任务的user_task_entry中会有一个抢占点:
Rust
pub fn create_new_user_task(user_space: Arc, thread_ref: Weak) -> Arc {
fn user_task_entry() {
let cur = Task::current();
let user_space = cur.user_space().expect("user task should have user space");
let mut user_mode = UserMode::new(user_space);
debug!(
"[Task entry] rip = 0x{:x}",
user_mode.context().instruction_pointer()
);
debug!(
"[Task entry] rsp = 0x{:x}",
user_mode.context().stack_pointer()
);
debug!(
"[Task entry] rax = 0x{:x}",
user_mode.context().syscall_ret()
);
loop {
let user_event = user_mode.execute();
let context = user_mode.context_mut();
// handle user event:
handle_user_event(user_event, context);
let current_thread = current_thread!();
// should be do this comparison before handle signal?
if current_thread.status().lock().is_exited() {
break;
}
handle_pending_signal(context).unwrap();
if current_thread.status().lock().is_exited() {
debug!("exit due to signal");
break;
}
// If current is suspended, wait for a signal to wake up self
while current_thread.status().lock().is_stopped() {
Thread::yield_now();
debug!("{} is suspended.", current_thread.tid());
handle_pending_signal(context).unwrap();
}
// a preemption point after handling user event.
preempt();
}
debug!("exit user loop");
// FIXME: This is a work around: exit in kernel task entry may be not called. Why this will happen?
Task::current().exit();
}
该抢占点会去检查当前任务是否能被其他任务抢占:
Rust
pub fn preempt() {
// disable interrupts to avoid nested preemption.
let disable_irq = disable_local();
let Some(curr_task) = current_task() else {
return;
};
let mut scheduler = GLOBAL_SCHEDULER.lock_irq_disabled();
if !scheduler.should_preempt(&curr_task) {
return;
}
let Some(next_task) = scheduler.dequeue() else {
return;
};
drop(scheduler);
switch_to_task(next_task);
}
should_preempt逻辑很简单,如果当前任务不是实时任务,且待执行的实时任务列表不为空则代表当前任务可以被抢占
Rust
fn should_preempt(&self, task: &Arc) -> bool {
!task.is_real_time() && !self.real_time_tasks.lock_irq_disabled().is_empty()
}
如果能被抢占,则调度一个新任务开始执行:
Rust
let Some(next_task) = scheduler.dequeue() else {
return;
};
drop(scheduler);
switch_to_task(next_task);
所以我们上面描述的简单逻辑可以是这样的:
Task1.kernel_task_entry-> Task1.user_task_entry -> 通过retq/sysretq返回到用户空间-> syscall_entry进入内核->Task1.user_task_entry处理系统调用之类的->能被抢占->switch_to_task ->Task2.kernel_task_entry->Task2.user_task_entry -> 通过retq/sysretq返回到用户空间-> syscall_entry进入内核->Task2.user_task_entry处理系统调用之类的
大概类似这样的流程。
几个不完善的点:
•感觉貌似对多CPU,多处理核心没有支持。
•另外抢占不是实时发生的,是必须等到正在执行的任务处理完成到某个阶段才发生。而且必须是当前任务陷入进kernel的时候才能被抢占。