runc 1.4.2 超深度分析 --- CLI层:main.go、命令文件、runner、信号处理、TTY
源码目录:runc 根目录
.go文件(main.go + 17个命令文件 + 辅助文件)总行数:约 2,500 行
一、main.go --- 程序入口逐行解析
1.1 版本嵌入
go
//go:embed VERSION
var version string // 编译时嵌入 VERSION 文件内容
var extraVersion = "" // 可选后缀,通过 -X main.extraVersion=xxx 设置
var gitCommit = "" // Git 提交哈希,通过 Makefile -X 设置
设计 :使用 Go 1.16+ 的 //go:embed 将版本信息编译进二进制,避免运行时读取文件。
1.2 printVersion --- 版本打印
go
func printVersion(c *cli.Context) {
w := c.App.Writer
fmt.Fprintln(w, "runc version", c.App.Version) // 版本号
if gitCommit != "" {
fmt.Fprintln(w, "commit:", gitCommit) // Git commit
}
fmt.Fprintln(w, "spec:", specs.Version) // OCI Spec 版本
fmt.Fprintln(w, "go:", runtime.Version()) // Go 版本
major, minor, micro := seccomp.Version()
if major+minor+micro > 0 {
fmt.Fprintf(w, "libseccomp: %d.%d.%d\n", major, minor, micro) // libseccomp 版本
}
}
1.3 main 函数 --- 逐行
go
func main() {
app := cli.NewApp() // 创建 urfave/cli 应用
app.Name = "runc"
app.Version = strings.TrimSpace(version) + extraVersion
app.Usage = usage // OCI runtime 用法说明
cli.VersionPrinter = printVersion // 替换默认版本打印函数
// ─── 确定 root 目录(容器状态存储路径) ───
root := "/run/runc" // 默认路径
xdgDirUsed := false
xdgRuntimeDir := os.Getenv("XDG_RUNTIME_DIR")
if xdgRuntimeDir != "" && shouldHonorXDGRuntimeDir() {
// 用户设置了 XDG_RUNTIME_DIR 且应该使用 → 使用 $XDG_RUNTIME_DIR/runc
// shouldHonorXDGRuntimeDir 检查是否运行在 user session 中
root = xdgRuntimeDir + "/runc"
xdgDirUsed = true
}
app.Flags = []cli.Flag{
cli.BoolFlag{Name: "debug", Usage: "enable debug logging"},
cli.StringFlag{Name: "log", Value: "", Usage: "set the log file path"},
cli.StringFlag{Name: "log-format", Value: "text", Usage: "set the log format (text|json)"},
cli.StringFlag{Name: "root", Value: root, Usage: "root directory for container state"},
cli.BoolFlag{Name: "systemd-cgroup", Usage: "enable systemd cgroup support"},
cli.StringFlag{Name: "rootless", Value: "auto", Usage: "ignore cgroup permission errors"},
}
// ─── 注册所有子命令 ───
app.Commands = []cli.Command{
checkpointCommand, createCommand, deleteCommand,
eventsCommand, execCommand, killCommand, listCommand,
pauseCommand, psCommand, restoreCommand, resumeCommand,
runCommand, specCommand, startCommand, stateCommand,
updateCommand, featuresCommand,
}
// ─── Before 钩子:在命令执行前运行 ───
app.Before = func(context *cli.Context) error {
// 1. XDG_RUNTIME_DIR 目录权限处理
if !context.IsSet("root") && xdgDirUsed {
if err := os.MkdirAll(root, 0o700); err != nil {
fmt.Fprintln(os.Stderr, "the path in $XDG_RUNTIME_DIR must be writable")
fatal(err)
}
// 设置 sticky bit 防止被系统自动清理
if err := os.Chmod(root, os.FileMode(0o700)|os.ModeSticky); err != nil {
fmt.Fprintln(os.Stderr, "you should check permission of $XDG_RUNTIME_DIR")
fatal(err)
}
}
// 2. 校验 root 目录
if err := reviseRootDir(context); err != nil {
return err
}
// 3. 配置日志
return configLogrus(context)
}
// ─── 错误输出 ───
cli.ErrWriter = &FatalWriter{cli.ErrWriter}
if err := app.Run(os.Args); err != nil {
fatal(err) // fatal = logrus.Fatal → os.Exit(1)
}
}
1.4 全局 flag 决策流程
#mermaid-svg-P4El4qKaWI2zV5sT{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-P4El4qKaWI2zV5sT .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-P4El4qKaWI2zV5sT .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-P4El4qKaWI2zV5sT .error-icon{fill:#552222;}#mermaid-svg-P4El4qKaWI2zV5sT .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-P4El4qKaWI2zV5sT .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-P4El4qKaWI2zV5sT .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-P4El4qKaWI2zV5sT .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-P4El4qKaWI2zV5sT .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-P4El4qKaWI2zV5sT .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-P4El4qKaWI2zV5sT .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-P4El4qKaWI2zV5sT .marker{fill:#333333;stroke:#333333;}#mermaid-svg-P4El4qKaWI2zV5sT .marker.cross{stroke:#333333;}#mermaid-svg-P4El4qKaWI2zV5sT svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-P4El4qKaWI2zV5sT p{margin:0;}#mermaid-svg-P4El4qKaWI2zV5sT .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#mermaid-svg-P4El4qKaWI2zV5sT .cluster-label text{fill:#333;}#mermaid-svg-P4El4qKaWI2zV5sT .cluster-label span{color:#333;}#mermaid-svg-P4El4qKaWI2zV5sT .cluster-label span p{background-color:transparent;}#mermaid-svg-P4El4qKaWI2zV5sT .label text,#mermaid-svg-P4El4qKaWI2zV5sT span{fill:#333;color:#333;}#mermaid-svg-P4El4qKaWI2zV5sT .node rect,#mermaid-svg-P4El4qKaWI2zV5sT .node circle,#mermaid-svg-P4El4qKaWI2zV5sT .node ellipse,#mermaid-svg-P4El4qKaWI2zV5sT .node polygon,#mermaid-svg-P4El4qKaWI2zV5sT .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-P4El4qKaWI2zV5sT .rough-node .label text,#mermaid-svg-P4El4qKaWI2zV5sT .node .label text,#mermaid-svg-P4El4qKaWI2zV5sT .image-shape .label,#mermaid-svg-P4El4qKaWI2zV5sT .icon-shape .label{text-anchor:middle;}#mermaid-svg-P4El4qKaWI2zV5sT .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-P4El4qKaWI2zV5sT .rough-node .label,#mermaid-svg-P4El4qKaWI2zV5sT .node .label,#mermaid-svg-P4El4qKaWI2zV5sT .image-shape .label,#mermaid-svg-P4El4qKaWI2zV5sT .icon-shape .label{text-align:center;}#mermaid-svg-P4El4qKaWI2zV5sT .node.clickable{cursor:pointer;}#mermaid-svg-P4El4qKaWI2zV5sT .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-P4El4qKaWI2zV5sT .arrowheadPath{fill:#333333;}#mermaid-svg-P4El4qKaWI2zV5sT .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-P4El4qKaWI2zV5sT .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-P4El4qKaWI2zV5sT .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-P4El4qKaWI2zV5sT .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-P4El4qKaWI2zV5sT .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-P4El4qKaWI2zV5sT .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-P4El4qKaWI2zV5sT .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-P4El4qKaWI2zV5sT .cluster text{fill:#333;}#mermaid-svg-P4El4qKaWI2zV5sT .cluster span{color:#333;}#mermaid-svg-P4El4qKaWI2zV5sT div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-P4El4qKaWI2zV5sT .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-P4El4qKaWI2zV5sT rect.text{fill:none;stroke-width:0;}#mermaid-svg-P4El4qKaWI2zV5sT .icon-shape,#mermaid-svg-P4El4qKaWI2zV5sT .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-P4El4qKaWI2zV5sT .icon-shape p,#mermaid-svg-P4El4qKaWI2zV5sT .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-P4El4qKaWI2zV5sT .icon-shape .label rect,#mermaid-svg-P4El4qKaWI2zV5sT .image-shape .label rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-P4El4qKaWI2zV5sT .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-P4El4qKaWI2zV5sT .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-P4El4qKaWI2zV5sT :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;} No
Yes
Yes
No
main()
$XDG_RUNTIME_DIR
设置且应遵守?
root = /run/runc
root = $XDG_RUNTIME_DIR/runc
xdgDirUsed = true
注册 17 个子命令
app.Before()
root 未显式设置
且 xdgDirUsed?
MkdirAll(root, 0700)
- sticky bit
reviseRootDir()
configLogrus()
app.Run(os.Args)
1.5 configLogrus --- 日志配置
go
func configLogrus(context *cli.Context) error {
// ─── Debug 模式 ───
if context.GlobalBool("debug") {
logrus.SetLevel(logrus.DebugLevel)
logrus.SetReportCaller(true) // 显示调用位置
// 缩短函数名和文件名前缀
_, file, _, _ := runtime.Caller(0)
prefix := filepath.Dir(file) + "/"
logrus.SetFormatter(&logrus.TextFormatter{
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
function := strings.TrimPrefix(f.Function, prefix) + "()"
fileLine := strings.TrimPrefix(f.File, prefix) + ":" + strconv.Itoa(f.Line)
return function, fileLine
},
})
}
// ─── 日志格式 ───
switch f := context.GlobalString("log-format"); f {
case "", "text": // 默认 text 格式
case "json":
logrus.SetFormatter(new(logrus.JSONFormatter))
default:
return errors.New("invalid log-format: " + f)
}
// ─── 日志输出文件 ───
if file := context.GlobalString("log"); file != "" {
f, err := os.OpenFile(file, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644)
if err != nil { return err }
logrus.SetOutput(f) // 重定向到文件
}
return nil
}
二、CtAct --- 容器操作类型
go
type CtAct uint8
const (
CT_ACT_CREATE CtAct = iota + 1 // 创建容器 (不立即运行)
CT_ACT_RUN // 创建并运行
CT_ACT_RESTORE // 从检查点恢复
)
三种操作共享 startContainer 入口,通过 CtAct 区分行为:
| CtAct | 对应命令 | 行为 |
|---|---|---|
| CT_ACT_CREATE | runc create |
Container.Start() → created 状态 |
| CT_ACT_RUN | runc run |
Container.Run() → 运行并等待退出 |
| CT_ACT_RESTORE | runc restore |
Container.Restore() → CRIU 恢复 |
三、startContainer --- 容器启动统一入口
go
func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.CriuOpts) (int, error) {
// ─── Step 1: 校验 pid-file ───
if err := revisePidFile(context); err != nil {
return -1, err
}
// ─── Step 2: 读取 OCI Spec ───
spec, err := setupSpec(context)
if err != nil {
return -1, err
}
// ─── Step 3: 获取容器 ID ───
id := context.Args().First()
if id == "" {
return -1, errEmptyID
}
// ─── Step 4: 设置 sd-notify socket ───
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
if notifySocket != nil {
notifySocket.setupSpec(spec) // 将 NOTIFY_SOCKET 注入 Spec
}
// ─── Step 5: 创建容器 ───
container, err := createContainer(context, id, spec)
if err != nil {
return -1, err
}
// ─── Step 6: 设置 notify socket (run 模式绑定) ───
if notifySocket != nil {
if err := notifySocket.setupSocketDirectory(); err != nil {
return -1, err
}
if action == CT_ACT_RUN {
if err := notifySocket.bindSocket(); err != nil { // 仅 run 时绑定
return -1, err
}
}
}
// ─── Step 7: 构建 runner 并执行 ───
r := &runner{
enableSubreaper: !context.Bool("no-subreaper"),
shouldDestroy: !context.Bool("keep"),
container: container,
listenFDs: activation.Files(), // systemd socket activation
notifySocket: notifySocket,
consoleSocket: context.String("console-socket"),
pidfdSocket: context.String("pidfd-socket"),
detach: context.Bool("detach"),
pidFile: context.String("pid-file"),
preserveFDs: context.Int("preserve-fds"),
action: action,
criuOpts: criuOpts,
init: true, // 标记为 init 进程
}
return r.run(spec.Process)
}
runner.run createContainer setupSpec startContainer 命令处理 runner.run createContainer setupSpec startContainer 命令处理 #mermaid-svg-MUQAXIQXr3cGeZkO{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-MUQAXIQXr3cGeZkO .error-icon{fill:#552222;}#mermaid-svg-MUQAXIQXr3cGeZkO .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-MUQAXIQXr3cGeZkO .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-MUQAXIQXr3cGeZkO .marker{fill:#333333;stroke:#333333;}#mermaid-svg-MUQAXIQXr3cGeZkO .marker.cross{stroke:#333333;}#mermaid-svg-MUQAXIQXr3cGeZkO svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-MUQAXIQXr3cGeZkO p{margin:0;}#mermaid-svg-MUQAXIQXr3cGeZkO .actor{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;}#mermaid-svg-MUQAXIQXr3cGeZkO text.actor>tspan{fill:black;stroke:none;}#mermaid-svg-MUQAXIQXr3cGeZkO .actor-line{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);}#mermaid-svg-MUQAXIQXr3cGeZkO .innerArc{stroke-width:1.5;stroke-dasharray:none;}#mermaid-svg-MUQAXIQXr3cGeZkO .messageLine0{stroke-width:1.5;stroke-dasharray:none;stroke:#333;}#mermaid-svg-MUQAXIQXr3cGeZkO .messageLine1{stroke-width:1.5;stroke-dasharray:2,2;stroke:#333;}#mermaid-svg-MUQAXIQXr3cGeZkO #arrowhead path{fill:#333;stroke:#333;}#mermaid-svg-MUQAXIQXr3cGeZkO .sequenceNumber{fill:white;}#mermaid-svg-MUQAXIQXr3cGeZkO #sequencenumber{fill:#333;}#mermaid-svg-MUQAXIQXr3cGeZkO #crosshead path{fill:#333;stroke:#333;}#mermaid-svg-MUQAXIQXr3cGeZkO .messageText{fill:#333;stroke:none;}#mermaid-svg-MUQAXIQXr3cGeZkO .labelBox{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;}#mermaid-svg-MUQAXIQXr3cGeZkO .labelText,#mermaid-svg-MUQAXIQXr3cGeZkO .labelText>tspan{fill:black;stroke:none;}#mermaid-svg-MUQAXIQXr3cGeZkO .loopText,#mermaid-svg-MUQAXIQXr3cGeZkO .loopText>tspan{fill:black;stroke:none;}#mermaid-svg-MUQAXIQXr3cGeZkO .loopLine{stroke-width:2px;stroke-dasharray:2,2;stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);}#mermaid-svg-MUQAXIQXr3cGeZkO .note{stroke:#aaaa33;fill:#fff5ad;}#mermaid-svg-MUQAXIQXr3cGeZkO .noteText,#mermaid-svg-MUQAXIQXr3cGeZkO .noteText>tspan{fill:black;stroke:none;}#mermaid-svg-MUQAXIQXr3cGeZkO .activation0{fill:#f4f4f4;stroke:#666;}#mermaid-svg-MUQAXIQXr3cGeZkO .activation1{fill:#f4f4f4;stroke:#666;}#mermaid-svg-MUQAXIQXr3cGeZkO .activation2{fill:#f4f4f4;stroke:#666;}#mermaid-svg-MUQAXIQXr3cGeZkO .actorPopupMenu{position:absolute;}#mermaid-svg-MUQAXIQXr3cGeZkO .actorPopupMenuPanel{position:absolute;fill:#ECECFF;box-shadow:0px 8px 16px 0px rgba(0,0,0,0.2);filter:drop-shadow(3px 5px 2px rgb(0 0 0 / 0.4));}#mermaid-svg-MUQAXIQXr3cGeZkO .actor-man line{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;}#mermaid-svg-MUQAXIQXr3cGeZkO .actor-man circle,#mermaid-svg-MUQAXIQXr3cGeZkO line{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;stroke-width:2px;}#mermaid-svg-MUQAXIQXr3cGeZkO :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;} startContainer(ctx, action)revisePidFile()setupSpec() → 读取 config.json*specs.SpeccreateContainer(ctx, id, spec)specconv + Factory.Create*ContainernewNotifySocket + setupSocketDirectoryrunner.run(spec.Process)(exitCode, error)
四、runner --- 容器运行器
4.1 runner 结构体
go
type runner struct {
init bool // 是否是 init 进程(总是 true)
enableSubreaper bool // 是否启用 subreaper
shouldDestroy bool // 容器退出后是否销毁
detach bool // 是否分离(不等待退出)
listenFDs []*os.File // systemd socket activation FDs
preserveFDs int // 额外保留的 FD 数量
pidFile string // PID 文件路径
consoleSocket string // console socket 路径
pidfdSocket string // pidfd socket 路径
container *libcontainer.Container // 容器实例
action CtAct // 操作类型
notifySocket *notifySocket // sd-notify socket
criuOpts *libcontainer.CriuOpts // CRIU 选项
subCgroupPaths map[string]string // 子 cgroup 路径
}
4.2 runner.run --- 逐行核心
go
func (r *runner) run(config *specs.Process) (int, error) {
var err error
defer func() {
if err != nil {
r.destroy() // 失败时销毁容器
}
}()
// ─── Step 1: 终端检查 ───
if err = r.checkTerminal(config); err != nil {
return -1, err
}
// 检查规则:
// detach + terminal + 无 consoleSocket → 报错
// (!detach || !terminal) + 有 consoleSocket → 报错
// ─── Step 2: 转换 OCI Process → libcontainer Process ───
process, err := newProcess(config)
if err != nil {
return -1, err
}
process.LogLevel = strconv.Itoa(int(logrus.GetLevel()))
process.Init = r.init // 标记为 init 进程
process.SubCgroupPaths = r.subCgroupPaths
// ─── Step 3: Socket Activation FDs ───
if len(r.listenFDs) > 0 {
process.Env = append(process.Env,
"LISTEN_FDS="+strconv.Itoa(len(r.listenFDs)),
"LISTEN_PID=1")
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
}
// ─── Step 4: Preserve FDs ───
baseFd := 3 + len(process.ExtraFiles)
procSelfFd, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC)
if err != nil {
return -1, err
}
defer closer()
defer procSelfFd.Close()
for i := baseFd; i < baseFd+r.preserveFDs; i++ {
// 检查 FD 是否存在
err := unix.Faccessat(int(procSelfFd.Fd()), strconv.Itoa(i), unix.F_OK, 0)
if err != nil {
return -1, fmt.Errorf("unable to stat preserved-fd %d: %w", i-baseFd, err)
}
process.ExtraFiles = append(process.ExtraFiles,
os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i)))
}
// ─── Step 5: 决定是否 detach ───
detach := r.detach || (r.action == CT_ACT_CREATE) // create 总是 detach
// ─── Step 6: 信号处理器 ───
handlerCh := newSignalHandler(r.enableSubreaper, r.notifySocket)
// ─── Step 7: 设置 IO ───
tty, err := setupIO(process, r.container, config.Terminal, detach, r.consoleSocket)
if err != nil {
return -1, err
}
defer tty.Close()
// ─── Step 8: pidfd socket ───
if r.pidfdSocket != "" {
connClose, err := setupPidfdSocket(process, r.pidfdSocket)
if err != nil {
return -1, err
}
defer connClose()
}
// ─── Step 9: 执行容器操作 ───
switch r.action {
case CT_ACT_CREATE:
err = r.container.Start(process) // 创建但不运行
case CT_ACT_RESTORE:
err = r.container.Restore(process, r.criuOpts) // CRIU 恢复
case CT_ACT_RUN:
err = r.container.Run(process) // 创建并运行
default:
panic("Unknown action")
}
if err != nil {
return -1, err
}
// ─── Step 10: 等待 console ───
if err = tty.waitConsole(); err != nil {
r.terminate(process)
return -1, err
}
tty.ClosePostStart()
// ─── Step 11: 写 PID 文件 ───
if r.pidFile != "" {
if err = createPidFile(r.pidFile, process); err != nil {
r.terminate(process)
return -1, err
}
}
// ─── Step 12: 等待容器退出 ───
handler := <-handlerCh
status, err := handler.forward(process, tty, detach)
if err != nil {
r.terminate(process)
}
if detach {
return 0, nil // detach 模式 → 返回 0
}
if err == nil {
r.destroy() // 容器退出后销毁(如果 shouldDestroy)
}
return status, err
}
runner.run 流程图
#mermaid-svg-SaxUWAui6HtjOCDg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-SaxUWAui6HtjOCDg .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-SaxUWAui6HtjOCDg .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-SaxUWAui6HtjOCDg .error-icon{fill:#552222;}#mermaid-svg-SaxUWAui6HtjOCDg .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-SaxUWAui6HtjOCDg .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-SaxUWAui6HtjOCDg .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-SaxUWAui6HtjOCDg .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-SaxUWAui6HtjOCDg .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-SaxUWAui6HtjOCDg .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-SaxUWAui6HtjOCDg .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-SaxUWAui6HtjOCDg .marker{fill:#333333;stroke:#333333;}#mermaid-svg-SaxUWAui6HtjOCDg .marker.cross{stroke:#333333;}#mermaid-svg-SaxUWAui6HtjOCDg svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-SaxUWAui6HtjOCDg p{margin:0;}#mermaid-svg-SaxUWAui6HtjOCDg .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#mermaid-svg-SaxUWAui6HtjOCDg .cluster-label text{fill:#333;}#mermaid-svg-SaxUWAui6HtjOCDg .cluster-label span{color:#333;}#mermaid-svg-SaxUWAui6HtjOCDg .cluster-label span p{background-color:transparent;}#mermaid-svg-SaxUWAui6HtjOCDg .label text,#mermaid-svg-SaxUWAui6HtjOCDg span{fill:#333;color:#333;}#mermaid-svg-SaxUWAui6HtjOCDg .node rect,#mermaid-svg-SaxUWAui6HtjOCDg .node circle,#mermaid-svg-SaxUWAui6HtjOCDg .node ellipse,#mermaid-svg-SaxUWAui6HtjOCDg .node polygon,#mermaid-svg-SaxUWAui6HtjOCDg .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-SaxUWAui6HtjOCDg .rough-node .label text,#mermaid-svg-SaxUWAui6HtjOCDg .node .label text,#mermaid-svg-SaxUWAui6HtjOCDg .image-shape .label,#mermaid-svg-SaxUWAui6HtjOCDg .icon-shape .label{text-anchor:middle;}#mermaid-svg-SaxUWAui6HtjOCDg .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-SaxUWAui6HtjOCDg .rough-node .label,#mermaid-svg-SaxUWAui6HtjOCDg .node .label,#mermaid-svg-SaxUWAui6HtjOCDg .image-shape .label,#mermaid-svg-SaxUWAui6HtjOCDg .icon-shape .label{text-align:center;}#mermaid-svg-SaxUWAui6HtjOCDg .node.clickable{cursor:pointer;}#mermaid-svg-SaxUWAui6HtjOCDg .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-SaxUWAui6HtjOCDg .arrowheadPath{fill:#333333;}#mermaid-svg-SaxUWAui6HtjOCDg .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-SaxUWAui6HtjOCDg .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-SaxUWAui6HtjOCDg .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-SaxUWAui6HtjOCDg .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-SaxUWAui6HtjOCDg .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-SaxUWAui6HtjOCDg .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-SaxUWAui6HtjOCDg .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-SaxUWAui6HtjOCDg .cluster text{fill:#333;}#mermaid-svg-SaxUWAui6HtjOCDg .cluster span{color:#333;}#mermaid-svg-SaxUWAui6HtjOCDg div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-SaxUWAui6HtjOCDg .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-SaxUWAui6HtjOCDg rect.text{fill:none;stroke-width:0;}#mermaid-svg-SaxUWAui6HtjOCDg .icon-shape,#mermaid-svg-SaxUWAui6HtjOCDg .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-SaxUWAui6HtjOCDg .icon-shape p,#mermaid-svg-SaxUWAui6HtjOCDg .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-SaxUWAui6HtjOCDg .icon-shape .label rect,#mermaid-svg-SaxUWAui6HtjOCDg .image-shape .label rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-SaxUWAui6HtjOCDg .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-SaxUWAui6HtjOCDg .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-SaxUWAui6HtjOCDg :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;} Yes
No
runner.run(config)
checkTerminal()
newProcess()
Socket Activation FDs
Preserve FDs
detach = r.detach
OR action==CREATE?
newSignalHandler()
setupIO()
setupPidfdSocket()
r.action?
container.Start(process)
container.Run(process)
container.Restore(process, criuOpts)
tty.waitConsole()
createPidFile()
← handlerCh
handler.forward()
detach?
r.destroy()
返回 (status, err)
五、命令文件详解
5.1 命令对比矩阵
| 命令 | 入口 | CtAct | Container方法 | detach默认 |
|---|---|---|---|---|
| create | startContainer | CT_ACT_CREATE | Start() | true(强制) |
| run | startContainer | CT_ACT_RUN | Run() | false |
| start | container.Exec() | --- | Exec() | --- |
| exec | container.Start(process) | --- | Start(execProcess) | --- |
| kill | container.Signal() | --- | Signal() | --- |
| delete | container.Destroy() | --- | Destroy() | --- |
| pause | container.Pause() | --- | Pause() | --- |
| resume | container.Resume() | --- | Resume() | --- |
| list | 遍历 root 目录 | --- | Load() | --- |
| state | container.State() | --- | State() | --- |
| ps | container.Processes() | --- | Processes() | --- |
| update | container.Set() | --- | Set() | --- |
| events | container.Stats() | --- | Stats() | --- |
| checkpoint | container.Checkpoint() | --- | Checkpoint() | --- |
| restore | startContainer | CT_ACT_RESTORE | Restore() | false |
| features | 读取特性列表 | --- | --- | --- |
| spec | 生成 config.json | --- | --- | --- |
5.2 runc create vs runc run 对比
#mermaid-svg-UPpamNjHBBcHqNZQ{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-UPpamNjHBBcHqNZQ .error-icon{fill:#552222;}#mermaid-svg-UPpamNjHBBcHqNZQ .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-UPpamNjHBBcHqNZQ .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-UPpamNjHBBcHqNZQ .marker{fill:#333333;stroke:#333333;}#mermaid-svg-UPpamNjHBBcHqNZQ .marker.cross{stroke:#333333;}#mermaid-svg-UPpamNjHBBcHqNZQ svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-UPpamNjHBBcHqNZQ p{margin:0;}#mermaid-svg-UPpamNjHBBcHqNZQ .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#mermaid-svg-UPpamNjHBBcHqNZQ .cluster-label text{fill:#333;}#mermaid-svg-UPpamNjHBBcHqNZQ .cluster-label span{color:#333;}#mermaid-svg-UPpamNjHBBcHqNZQ .cluster-label span p{background-color:transparent;}#mermaid-svg-UPpamNjHBBcHqNZQ .label text,#mermaid-svg-UPpamNjHBBcHqNZQ span{fill:#333;color:#333;}#mermaid-svg-UPpamNjHBBcHqNZQ .node rect,#mermaid-svg-UPpamNjHBBcHqNZQ .node circle,#mermaid-svg-UPpamNjHBBcHqNZQ .node ellipse,#mermaid-svg-UPpamNjHBBcHqNZQ .node polygon,#mermaid-svg-UPpamNjHBBcHqNZQ .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-UPpamNjHBBcHqNZQ .rough-node .label text,#mermaid-svg-UPpamNjHBBcHqNZQ .node .label text,#mermaid-svg-UPpamNjHBBcHqNZQ .image-shape .label,#mermaid-svg-UPpamNjHBBcHqNZQ .icon-shape .label{text-anchor:middle;}#mermaid-svg-UPpamNjHBBcHqNZQ .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-UPpamNjHBBcHqNZQ .rough-node .label,#mermaid-svg-UPpamNjHBBcHqNZQ .node .label,#mermaid-svg-UPpamNjHBBcHqNZQ .image-shape .label,#mermaid-svg-UPpamNjHBBcHqNZQ .icon-shape .label{text-align:center;}#mermaid-svg-UPpamNjHBBcHqNZQ .node.clickable{cursor:pointer;}#mermaid-svg-UPpamNjHBBcHqNZQ .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-UPpamNjHBBcHqNZQ .arrowheadPath{fill:#333333;}#mermaid-svg-UPpamNjHBBcHqNZQ .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-UPpamNjHBBcHqNZQ .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-UPpamNjHBBcHqNZQ .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-UPpamNjHBBcHqNZQ .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-UPpamNjHBBcHqNZQ .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-UPpamNjHBBcHqNZQ .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-UPpamNjHBBcHqNZQ .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-UPpamNjHBBcHqNZQ .cluster text{fill:#333;}#mermaid-svg-UPpamNjHBBcHqNZQ .cluster span{color:#333;}#mermaid-svg-UPpamNjHBBcHqNZQ div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-UPpamNjHBBcHqNZQ .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-UPpamNjHBBcHqNZQ rect.text{fill:none;stroke-width:0;}#mermaid-svg-UPpamNjHBBcHqNZQ .icon-shape,#mermaid-svg-UPpamNjHBBcHqNZQ .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-UPpamNjHBBcHqNZQ .icon-shape p,#mermaid-svg-UPpamNjHBBcHqNZQ .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-UPpamNjHBBcHqNZQ .icon-shape .label rect,#mermaid-svg-UPpamNjHBBcHqNZQ .image-shape .label rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-UPpamNjHBBcHqNZQ .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-UPpamNjHBBcHqNZQ .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-UPpamNjHBBcHqNZQ :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;} runc run
startContainer(CT_ACT_RUN)
container.Run(process)
detach = r.detach (默认 false)
等待容器退出 → destroy
runc create
startContainer(CT_ACT_CREATE)
container.Start(process)
detach = true
返回 → 容器处于 Created 状态
关键差异:
create只创建不运行(exec.fifo 阻塞用户进程),总是 detachrun创建后立即运行(打开 exec.fifo),默认前台等待退出
5.3 runc exec --- 在运行中容器执行新进程
go
var execCommand = cli.Command{
Name: "exec",
// ...
Action: func(context *cli.Context) error {
// 1. 校验参数
if err := checkArgs(context, -1, minimumArgs); err != nil {
return err
}
// 2. 获取已存在的容器
container, err := getContainer(context) // libcontainer.Load()
if err != nil { return err }
// 3. 读取 exec 的 process spec(可选 -p 参数或命令行参数)
spec, err := setupSpec(context) // 或从 --process 读取
// ...
// 4. 构建 process
process, err := newProcess(spec.Process)
process.Init = false // ← 关键:不是 init 进程
process.SubCgroupPaths = subCgroupPaths
// 5. 设置 IO
tty, err := setupIO(process, container, spec.Terminal, detach, consoleSocket)
// 6. 启动新进程
handlerCh := newSignalHandler(enableSubreaper, notifySocket)
if err := container.Start(process); err != nil {
return err
}
// 7. 等待退出
// ...
},
}
exec vs create 的区别:
- exec:
process.Init = false→ setnsProcess (加入已有命名空间) - create:
process.Init = true→ initProcess (创建新命名空间)
5.4 runc start --- 触发 exec.fifo
go
// runc start 唤醒已 created 的容器
var startCommand = cli.Command{
Name: "start",
Action: func(context *cli.Context) error {
container, err := getContainer(context)
// ...
return container.Exec() // 打开 exec.fifo → 用户进程开始运行
},
}
Container.Exec() 原理:
- 打开
stateDir/exec.fifo(阻塞等待 runc init 读取) - runc init 从 fifo 读取到数据 → 开始执行用户命令
- fifo 被删除 → 容器状态变为 Running
5.5 runc kill --- 发送信号
go
var killCommand = cli.Command{
Name: "kill",
Flags: []cli.Flag{
cli.BoolFlag{Name: "all, a", Usage: "send signal to all processes"},
},
Action: func(context *cli.Context) error {
container, err := getContainer(context)
sig := unix.SignalNum(context.Args().Get(1)) // 解析信号名
if context.Bool("all") {
return signalAllProcesses(container, sig) // 杀死所有进程
}
return container.Signal(sig) // 杀死 init 进程
},
}
5.6 runc delete --- 删除容器
go
var deleteCommand = cli.Command{
Name: "delete",
Flags: []cli.Flag{
cli.BoolFlag{Name: "force, f", Usage: "force deletion"},
},
Action: func(context *cli.Context) error {
container, err := getContainer(context)
if context.Bool("force") {
// 先 kill 再 destroy
container.Signal(unix.SIGKILL)
_, _ = container.Wait()
}
return container.Destroy()
},
}
5.7 runc list --- 列出所有容器
go
// 遍历 root 目录,对每个子目录 Load 容器
func listContainers(context *cli.Context) {
root := context.GlobalString("root")
dir, _ := os.Open(root)
entries, _ := dir.Readdirnames(0)
for _, id := range entries {
container, err := libcontainer.Load(root, id)
if err != nil { continue }
state, _ := container.State()
// 输出: ID PID STATUS BUNDLE CREATED
}
}
六、setupIO --- IO 设置决策矩阵
go
func setupIO(process, container, createTTY, detach, sockpath) (*tty, error) {
if createTTY {
if !detach {
// 前台 + TTY → 创建 console socket pair
// runc 进程接收 master pty
process.ConsoleSocket = child
go t.recvtty(parent)
} else {
// 后台 + TTY → 连接用户提供的 console-socket
conn, _ := net.Dial("unix", sockpath)
process.ConsoleSocket = socket
}
return t, nil
}
if detach {
// 后台 + 无 TTY → 继承 runc 的 stdio
inheritStdio(process)
return &tty{}, nil
}
// 前台 + 无 TTY → 创建管道
return setupProcessPipes(process, rootuid, rootgid)
}
setupIO 决策矩阵
| createTTY | detach | 行为 | ConsoleSocket 来源 |
|---|---|---|---|
| true | false | 前台TTY | 内部 socket pair → recvtty |
| true | true | 后台TTY | 用户提供的 console-socket |
| false | true | 后台无TTY | 继承 runc stdio |
| false | false | 前台无TTY | setupProcessPipes (管道) |
七、信号处理 --- newSignalHandler
go
func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) chan signalHandler {
handlerCh := make(chan signalHandler)
go func() {
// ─── 安装信号处理器 ───
sigchan := make(chan os.Signal, 40)
signal.Catch(sigchan, ...)
// ─── 启用 subreaper ───
if enableSubreaper {
unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0)
}
handler := &signalHandler{
sigchan: sigchan,
notifySocket: notifySocket,
pidfd: -1,
}
handlerCh <- handler // 发送给 run() 等待者
// ─── 信号转发循环 ───
for sig := range sigchan {
handler.handleSignal(sig)
}
}()
return handlerCh
}
信号处理流程
#mermaid-svg-0tsnGIxwSfLm8Cy4{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .error-icon{fill:#552222;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .marker{fill:#333333;stroke:#333333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .marker.cross{stroke:#333333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 p{margin:0;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .cluster-label text{fill:#333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .cluster-label span{color:#333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .cluster-label span p{background-color:transparent;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .label text,#mermaid-svg-0tsnGIxwSfLm8Cy4 span{fill:#333;color:#333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .node rect,#mermaid-svg-0tsnGIxwSfLm8Cy4 .node circle,#mermaid-svg-0tsnGIxwSfLm8Cy4 .node ellipse,#mermaid-svg-0tsnGIxwSfLm8Cy4 .node polygon,#mermaid-svg-0tsnGIxwSfLm8Cy4 .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .rough-node .label text,#mermaid-svg-0tsnGIxwSfLm8Cy4 .node .label text,#mermaid-svg-0tsnGIxwSfLm8Cy4 .image-shape .label,#mermaid-svg-0tsnGIxwSfLm8Cy4 .icon-shape .label{text-anchor:middle;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .rough-node .label,#mermaid-svg-0tsnGIxwSfLm8Cy4 .node .label,#mermaid-svg-0tsnGIxwSfLm8Cy4 .image-shape .label,#mermaid-svg-0tsnGIxwSfLm8Cy4 .icon-shape .label{text-align:center;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .node.clickable{cursor:pointer;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .arrowheadPath{fill:#333333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-0tsnGIxwSfLm8Cy4 .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-0tsnGIxwSfLm8Cy4 .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-0tsnGIxwSfLm8Cy4 .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .cluster text{fill:#333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .cluster span{color:#333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-0tsnGIxwSfLm8Cy4 rect.text{fill:none;stroke-width:0;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .icon-shape,#mermaid-svg-0tsnGIxwSfLm8Cy4 .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .icon-shape p,#mermaid-svg-0tsnGIxwSfLm8Cy4 .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .icon-shape .label rect,#mermaid-svg-0tsnGIxwSfLm8Cy4 .image-shape .label rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-0tsnGIxwSfLm8Cy4 .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-0tsnGIxwSfLm8Cy4 .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-0tsnGIxwSfLm8Cy4 :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;} SIGCHLD
SIGWINCH
SIGUSR1
其他
Yes
No
信号到达
信号类型?
SIGCHLD → wait4() 回收子进程
SIGWINCH → 调整 console 大小
SIGUSR1 → sd-notify READY=1
其他信号 → 转发给容器 init
有 pidfd?
pidfd_send_signal()
kill(initPid, sig)
八、notifySocket --- sd-notify 集成
go
type notifySocket struct {
socket *net.UnixConn // 监听 socket
host string // socket 地址
socketPath string // 文件系统路径
}
// setupSpec: 将 NOTIFY_SOCKET 环境变量注入容器 spec
func (n *notifySocket) setupSpec(spec *specs.Spec) {
spec.Process.Env = append(spec.Process.Env, "NOTIFY_SOCKET="+n.socketPath)
}
// run: 监听容器发来的 sd-notify 消息,转发给宿主机的 systemd
func (n *notifySocket) run(c *libcontainer.Container) {
for {
buf := make([]byte, 512+unix.CmsgSpace(4))
n.socket.ReadFromUnix(buf)
// 检查是否 READY=1 → 通知 systemd
// 检查是否 MAINPID= → 更新 main PID
// 转发到宿主机的 $NOTIFY_SOCKET
}
}
设计意图 :容器内进程通过 NOTIFY_SOCKET 发送 sd-notify 消息,runc 监听并转发给宿主机的 systemd,实现服务就绪通知。
九、FatalWriter --- 错误输出桥接
go
type FatalWriter struct {
cliErrWriter io.Writer
}
func (f *FatalWriter) Write(p []byte) (n int, err error) {
logrus.Error(string(p)) // 错误消息记录到 logrus
if !logrusToStderr() {
return f.cliErrWriter.Write(p) // 如果日志不在 stderr → 也写到 stderr
}
return len(p), nil
}
设计原因:urfave/cli 默认将错误写到 cli.ErrWriter (stderr)。runc 希望错误同时经过 logrus 记录,所以替换 ErrWriter。
十、createContainer --- OCI Spec → libcontainer Container
go
func createContainer(context *cli.Context, id string, spec *specs.Spec) (*libcontainer.Container, error) {
// ─── Step 1: 判断 rootless ───
rootlessCg, err := shouldUseRootlessCgroupManager(context)
// 根据是否 root + --rootless 参数决定
// ─── Step 2: OCI Spec → libcontainer Config ───
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
CgroupName: id,
UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
NoPivotRoot: context.Bool("no-pivot"),
NoNewKeyring: context.Bool("no-new-keyring"),
Spec: spec,
RootlessEUID: os.Geteuid() != 0,
RootlessCgroups: rootlessCg,
})
// ─── Step 3: Factory.Create ───
root := context.GlobalString("root")
return libcontainer.Create(root, id, config)
}
#mermaid-svg-Lmnxylrvh8R8Ms0y{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-Lmnxylrvh8R8Ms0y .error-icon{fill:#552222;}#mermaid-svg-Lmnxylrvh8R8Ms0y .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-Lmnxylrvh8R8Ms0y .marker{fill:#333333;stroke:#333333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .marker.cross{stroke:#333333;}#mermaid-svg-Lmnxylrvh8R8Ms0y svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-Lmnxylrvh8R8Ms0y p{margin:0;}#mermaid-svg-Lmnxylrvh8R8Ms0y .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .cluster-label text{fill:#333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .cluster-label span{color:#333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .cluster-label span p{background-color:transparent;}#mermaid-svg-Lmnxylrvh8R8Ms0y .label text,#mermaid-svg-Lmnxylrvh8R8Ms0y span{fill:#333;color:#333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .node rect,#mermaid-svg-Lmnxylrvh8R8Ms0y .node circle,#mermaid-svg-Lmnxylrvh8R8Ms0y .node ellipse,#mermaid-svg-Lmnxylrvh8R8Ms0y .node polygon,#mermaid-svg-Lmnxylrvh8R8Ms0y .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-Lmnxylrvh8R8Ms0y .rough-node .label text,#mermaid-svg-Lmnxylrvh8R8Ms0y .node .label text,#mermaid-svg-Lmnxylrvh8R8Ms0y .image-shape .label,#mermaid-svg-Lmnxylrvh8R8Ms0y .icon-shape .label{text-anchor:middle;}#mermaid-svg-Lmnxylrvh8R8Ms0y .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-Lmnxylrvh8R8Ms0y .rough-node .label,#mermaid-svg-Lmnxylrvh8R8Ms0y .node .label,#mermaid-svg-Lmnxylrvh8R8Ms0y .image-shape .label,#mermaid-svg-Lmnxylrvh8R8Ms0y .icon-shape .label{text-align:center;}#mermaid-svg-Lmnxylrvh8R8Ms0y .node.clickable{cursor:pointer;}#mermaid-svg-Lmnxylrvh8R8Ms0y .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .arrowheadPath{fill:#333333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-Lmnxylrvh8R8Ms0y .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-Lmnxylrvh8R8Ms0y .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-Lmnxylrvh8R8Ms0y .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-Lmnxylrvh8R8Ms0y .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-Lmnxylrvh8R8Ms0y .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-Lmnxylrvh8R8Ms0y .cluster text{fill:#333;}#mermaid-svg-Lmnxylrvh8R8Ms0y .cluster span{color:#333;}#mermaid-svg-Lmnxylrvh8R8Ms0y div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-Lmnxylrvh8R8Ms0y .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-Lmnxylrvh8R8Ms0y rect.text{fill:none;stroke-width:0;}#mermaid-svg-Lmnxylrvh8R8Ms0y .icon-shape,#mermaid-svg-Lmnxylrvh8R8Ms0y .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-Lmnxylrvh8R8Ms0y .icon-shape p,#mermaid-svg-Lmnxylrvh8R8Ms0y .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-Lmnxylrvh8R8Ms0y .icon-shape .label rect,#mermaid-svg-Lmnxylrvh8R8Ms0y .image-shape .label rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-Lmnxylrvh8R8Ms0y .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-Lmnxylrvh8R8Ms0y .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-Lmnxylrvh8R8Ms0y :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;} specs.Spec
(OCI Runtime Spec)
specconv.CreateLibcontainerConfig()
configs.Config
(libcontainer 内部)
libcontainer.Create()
*Container
十一、命令执行全景时序
SignalHandler runc init initProcess Container Factory.Create runner startContainer runc main 用户 SignalHandler runc init initProcess Container Factory.Create runner startContainer runc main 用户 #mermaid-svg-Q5c2c84jeVSHiUBK{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-Q5c2c84jeVSHiUBK .error-icon{fill:#552222;}#mermaid-svg-Q5c2c84jeVSHiUBK .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-Q5c2c84jeVSHiUBK .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-Q5c2c84jeVSHiUBK .marker{fill:#333333;stroke:#333333;}#mermaid-svg-Q5c2c84jeVSHiUBK .marker.cross{stroke:#333333;}#mermaid-svg-Q5c2c84jeVSHiUBK svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-Q5c2c84jeVSHiUBK p{margin:0;}#mermaid-svg-Q5c2c84jeVSHiUBK .actor{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;}#mermaid-svg-Q5c2c84jeVSHiUBK text.actor>tspan{fill:black;stroke:none;}#mermaid-svg-Q5c2c84jeVSHiUBK .actor-line{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);}#mermaid-svg-Q5c2c84jeVSHiUBK .innerArc{stroke-width:1.5;stroke-dasharray:none;}#mermaid-svg-Q5c2c84jeVSHiUBK .messageLine0{stroke-width:1.5;stroke-dasharray:none;stroke:#333;}#mermaid-svg-Q5c2c84jeVSHiUBK .messageLine1{stroke-width:1.5;stroke-dasharray:2,2;stroke:#333;}#mermaid-svg-Q5c2c84jeVSHiUBK #arrowhead path{fill:#333;stroke:#333;}#mermaid-svg-Q5c2c84jeVSHiUBK .sequenceNumber{fill:white;}#mermaid-svg-Q5c2c84jeVSHiUBK #sequencenumber{fill:#333;}#mermaid-svg-Q5c2c84jeVSHiUBK #crosshead path{fill:#333;stroke:#333;}#mermaid-svg-Q5c2c84jeVSHiUBK .messageText{fill:#333;stroke:none;}#mermaid-svg-Q5c2c84jeVSHiUBK .labelBox{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;}#mermaid-svg-Q5c2c84jeVSHiUBK .labelText,#mermaid-svg-Q5c2c84jeVSHiUBK .labelText>tspan{fill:black;stroke:none;}#mermaid-svg-Q5c2c84jeVSHiUBK .loopText,#mermaid-svg-Q5c2c84jeVSHiUBK .loopText>tspan{fill:black;stroke:none;}#mermaid-svg-Q5c2c84jeVSHiUBK .loopLine{stroke-width:2px;stroke-dasharray:2,2;stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);}#mermaid-svg-Q5c2c84jeVSHiUBK .note{stroke:#aaaa33;fill:#fff5ad;}#mermaid-svg-Q5c2c84jeVSHiUBK .noteText,#mermaid-svg-Q5c2c84jeVSHiUBK .noteText>tspan{fill:black;stroke:none;}#mermaid-svg-Q5c2c84jeVSHiUBK .activation0{fill:#f4f4f4;stroke:#666;}#mermaid-svg-Q5c2c84jeVSHiUBK .activation1{fill:#f4f4f4;stroke:#666;}#mermaid-svg-Q5c2c84jeVSHiUBK .activation2{fill:#f4f4f4;stroke:#666;}#mermaid-svg-Q5c2c84jeVSHiUBK .actorPopupMenu{position:absolute;}#mermaid-svg-Q5c2c84jeVSHiUBK .actorPopupMenuPanel{position:absolute;fill:#ECECFF;box-shadow:0px 8px 16px 0px rgba(0,0,0,0.2);filter:drop-shadow(3px 5px 2px rgb(0 0 0 / 0.4));}#mermaid-svg-Q5c2c84jeVSHiUBK .actor-man line{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;}#mermaid-svg-Q5c2c84jeVSHiUBK .actor-man circle,#mermaid-svg-Q5c2c84jeVSHiUBK line{stroke:hsl(259.6261682243, 59.7765363128%, 87.9019607843%);fill:#ECECFF;stroke-width:2px;}#mermaid-svg-Q5c2c84jeVSHiUBK :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;} runc create my-ct nsenter → setupRootfs → syncReady 等待 exec.fifo runc start my-ct fifo 被读取 → 用户进程启动 runc run my-ct Start + Exec (自动打开fifo) runc create my-ctstartContainer(CT_ACT_CREATE)Create(root, id, config)*Container (stopped)runner{action:CT_ACT_CREATE}Start(process) → initProcess.start()newParentProcess → fork/exec runc initstart()procReadycgroup.Set + hooksprocRundetach=true → 返回 0runc start my-ctLoad(root, id) → ContainerExec() → 打开 exec.fifo状态 → Runningrunc run my-ctstartContainer(CT_ACT_RUN)runner{action:CT_ACT_RUN}Run(process)等待信号/子进程退出容器退出Destroy()
十二、设计模式总结
| # | 模式 | 体现 |
|---|---|---|
| 1 | 命令模式 | 17 个 cli.Command 独立注册 |
| 2 | 策略模式 | CtAct 区分 create/run/restore 三种策略 |
| 3 | 模板方法 | startContainer 统一流程,runner.run 分支执行 |
| 4 | 观察者 | SignalHandler 监听信号并转发 |
| 5 | 桥接 | FatalWriter 桥接 cli.ErrWriter → logrus |
| 6 | 选项模式 | specconv.CreateOpts + cli.Flag |
| 7 | 代理 | notifySocket 代理容器→systemd 的通知 |
| 8 | 工厂 | libcontainer.Create() 创建 Container |
| 9 | 分离接口 | CLI 层与 libcontainer 解耦 |
| 10 | Exec FIFO 延迟启动 | create 与 start 两步分离 |