runc网络与systemd管理runc应用

1. 创建网络命名空间

bash 复制代码
ip netns add <ns>

# ns是自定义网络空间名

# 例如: ip netns add haproxy
# 创建一个给 haproxy使用的网络命名空间

# 新增一个网络空间会在 /var/run/netns/目录下生成和网络空间同名的文件

2. 创建虚拟网络设备对

bash 复制代码
ip link add <vname1> type veth peer name <vname2>

# vname1和vname2是虚拟网络设备对两端,类似pipeline
# 例如:ip link add haproxy-out type veth peer name haproxy-in'

3. 将虚拟网络设备对一端放入网络命名空间中

bash 复制代码
ip link set <vname2> netns <ns>

# 将虚拟网络设备对一端放入ns网络命名空间中
# 例如:ip link set haproxy-in netns haproxy

4. 分配IP地址给虚拟网络设备对并启动

bash 复制代码
ifconfig <vname1> <ip> netmask <mask> up
ip netns exec <ns> ifconfig <vname2> <ip> netmask <mask> up

# 注意两个网络设备对IP地址归属于同一个网段,子网掩码相同,up是启用网卡
# 例如:
# ifconfig haproxy-out 192.168.45.2 netmask 255.255.255.0 up
# ip netns exec haproxy ifconfig haproxy-in 192.168.45.3 netmask 255.255.255.0 ip

# ip 命令
ip netns exec <ns> ip addr add <ip>/<mask> dev <v2name> # 分配一个IP地址
ip netns exec <ns> ip link set <v2name> up # 启动网卡
ip netns exec <ns> ip link set lo up # 启动回环网卡

ip addr add <ip>/<mask> dev <v1name>
ip link set <v1name> up # 启动网卡

5. 命令空间中添加路由网关

bash 复制代码
ip netns exec <ns> route add default gw <vname1_ip>

# 路由网关是虚拟网络设备对其中一端放在主机端的IP地址
# 例如:
# ip netns exec haproxy route add default gw 192.168.45.2
# 查询:ip netns exec haproxy route -n

6. 主机开启路由转发 -- ip_forward

bash 复制代码
echo "net.ipv4.ip_forward=1" >> /etc/sysctl.conf
sysctl -p

7. 配置SNAT,将网络空间内的数据包源地址更改为主机网卡(eth0)地址

bash 复制代码
iptables -t nat -A POSTROUTING -s <ip/mask> -o eth0 -j MASQUERADE

# -o 指定出站网卡设备名
# -t 指定修改的规则表
# -s 指定匹配的源IP地址
# -A
# -j
# 例如:
# iptables -t nat -A POSTROUTING -s 192.168.45.0/24 -o eth0 -j MASQUERADE

8. 添加主机网络到虚拟网卡forward规则

bash 复制代码
iptables -t filter -A FORWARD -i eth0 -o <vname1> -j ACCEPT
iptables -t filter -A FORWARD -o eth0 -i <vname1> -j ACCEPT

# eth0是主机能够连接外网的网卡

9. 连接测试

bash 复制代码
# 1. 主机是否能够连接到命令空间内网络
ping 192.168.45.3

# 2. 命名空间内网卡是否能连接到主机网络
ip netns exec haproxy ping <eth0_ip>
# 主机外网网卡通过 ip a获取

# 3. 命名空间内网卡是否能连接内网-internet
ip netns exec haproxy ping 192.168.0.185

# 4. 测试是否可以连接外网
ping [www.python.org](http://www.python.org/) # 获取域名对应的IP地址
ip netns exec haproxy ping 151.101.108.223

10. runc部署nginx测试

bash 复制代码
# 1. 创建runc运行目录与文件 (需要装runc命令)
mkdir -p `pwd`/nginx/rootfs && cd `pwd`/nginx && runc spec

# 2.导出nginx容器中rootfs文件到 当前目录下 rootfs目录下
docker export $(docker run -d nginx) -o nginx-rootfs.tgz
tar -xf nginx-rootfs.tgz -C rootfs

# 3.编写配置config.json文件
# hostname --容器主机名 nginx
# args -- 容器中进程启动命令,可通过 docker inspect container_id中cmd获取
# namespace 需要添加network配置,将上面生成的网络空间文件配置到 path
# mount 存储卷挂载,主要挂载配置文件目录与静态文件目录
# env 环境变量

# 3. 创建nginx进程使用的用户
useradd nginx -s /sbin/nologin
# 4. 授权
chown -R nginx:nginx rootfs config.json

11. Nginx -- runc_config.json内容

json 复制代码
{
  "ociVersion": "1.0.2-dev",
  "process": {
    "terminal": true,
    "user": {
      "uid": 1000,
      "gid": 1000
    },
    "args": [
      "/docker-entrypoint.sh",
      "nginx",
      "-g",
      "daemon off;"
    ],
    "env": [
      "PATH=/usr/local/[sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin](http://sbin/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin)",
      "TERM=xterm",
      "NGINX_VERSION=1.21.4",
      "NJS_VERSION=0.7.0",
      "PKG_RELEASE=1~bullseye"
    ],
    "cwd": "/",
    "capabilities": {
      "bounding": [
        "CAP_AUDIT_WRITE",
        "CAP_KILL",
        "CAP_NET_BIND_SERVICE"
      ],
      "effective": [
        "CAP_AUDIT_WRITE",
        "CAP_KILL",
        "CAP_NET_BIND_SERVICE"
      ],
      "inheritable": [
        "CAP_AUDIT_WRITE",
        "CAP_KILL",
        "CAP_NET_BIND_SERVICE"
      ],
      "permitted": [
        "CAP_AUDIT_WRITE",
        "CAP_KILL",
        "CAP_NET_BIND_SERVICE"
      ],
      "ambient": [
        "CAP_AUDIT_WRITE",
        "CAP_KILL",
        "CAP_NET_BIND_SERVICE"
      ]
    },
    "rlimits": [
      {
        "type": "RLIMIT_NOFILE",
        "hard": 102400,
        "soft": 102400
      },
      {
        "hard": 102400,
        "soft": 102400,
        "type": "RLIMIT_NPROC"
      }
    ],
    "noNewPrivileges": true
  },
  "root": {
    "path": "rootfs",
    "readonly": true
  },
  "hostname": "nginx",
  "mounts": [
    {
      "destination": "/proc",
      "type": "proc",
      "source": "proc"
    },
    {
      "destination": "/dev",
      "type": "tmpfs",
      "source": "tmpfs",
      "options": [
        "nosuid",
        "strictatime",
        "mode=755",
        "size=65536k"
      ]
    },
    {
      "destination": "/dev/pts",
      "type": "devpts",
      "source": "devpts",
      "options": [
        "nosuid",
        "noexec",
        "newinstance",
        "ptmxmode=0666",
        "mode=0620",
        "gid=5"
      ]
    },
    {
      "destination": "/dev/shm",
      "type": "tmpfs",
      "source": "shm",
      "options": [
        "nosuid",
        "noexec",
        "nodev",
        "mode=1777",
        "size=65536k"
      ]
    },
    {
      "destination": "/dev/mqueue",
      "type": "mqueue",
      "source": "mqueue",
      "options": [
        "nosuid",
        "noexec",
        "nodev"
      ]
    },
    {
      "destination": "/sys",
      "type": "sysfs",
      "source": "sysfs",
      "options": [
        "nosuid",
        "noexec",
        "nodev",
        "ro"
      ]
    },
    {
      "destination": "/sys/fs/cgroup",
      "type": "cgroup",
      "source": "cgroup",
      "options": [
        "nosuid",
        "noexec",
        "nodev",
        "relatime",
        "ro"
      ]
    },
    {
      "destination": "/etc/hostname",
      "options": [
        "rbind",
        "rprivate"
      ],
      "source": "/etc/hostname",
      "type": "bind"
    },
    {
      "destination": "/etc/hosts",
      "options": [
        "rbind",
        "rprivate"
      ],
      "source": "/etc/hosts",
      "type": "bind"
    }
  ],
  "linux": {
    "resources": {
      "devices": [
        {
          "allow": false,
          "access": "rwm"
        }
      ]
    },
    "namespaces": [
      {
        "type": "pid"
      },
      {
        "path": "/var/run/netns/haproxy",
        "type": "network"
      },
      {
        "type": "ipc"
      },
      {
        "type": "uts"
      },
      {
        "type": "mount"
      }
    ],
    "maskedPaths": [
      "/proc/acpi",
      "/proc/asound",
      "/proc/kcore",
      "/proc/keys",
      "/proc/latency_stats",
      "/proc/timer_list",
      "/proc/timer_stats",
      "/proc/sched_debug",
      "/sys/firmware",
      "/proc/scsi"
    ],
    "readonlyPaths": [
      "/proc/bus",
      "/proc/fs",
      "/proc/irq",
      "/proc/sys",
      "/proc/sysrq-trigger"
    ]
  }
}

12. 通过runc启动nginx

bash 复制代码
# 1. 启动nginx
runc run -d -b <path-to>/nginx <container_name>
# -d 以后台方式运行
# -b 指定保存了容器 config.json文件和rootfs目录的根目录
# <container_name> 指定容器名字
# 例如:
# runc run -d -b ~/work/nginx nginx

# 2. 查询容器
runc list

# 3. 查询容器状态
cat /run/runc/<container_name>/state.json

# 4. 发起curl请求,检查是否返回nginx欢迎页面
curl http://192.168.45.3:80

13. Runc 使用 host network

bash 复制代码
删除 config.json 中 namespace 配置中 network配置项
{
"path": "/var/run/netns/haproxy",
"type": "network"
},
如果不删除则:always put your container in a new network namespace

文档:https://github.com/opencontainers/runc/issues/1552

# 测试
curl [http://localhost:80](http://localhost/)

14. 使用systemd管理 -- 编写service文件

bash 复制代码
[Unit]
Description=runc run nginx

[Service]
ExecStart=/usr/bin/runc run -d -b /root/work/nginx --pid-file /run/nginx.pid nginx
ExecStop=/usr/bin/runc kill nginx
ExecStopPost=/usr/bin/runc delete nginx
PIDFile=/run/nginx.pid

[Install]
WantedBy=multi-user.target

15. 保存并启动

bash 复制代码
# 1. 保存到 /usr/lib/systemd/system/目录下
vim /usr/lib/systemd/system/nginx.service

# 2. 使用systemctl工具启动
systemctl daemon-reload
# 重新加载dameon关联的service文件

# 3.设置开机自启并启动
systemctl enable nginx && systemctl start nginx

# 4. 启动时可能会报错: [emerg] 1#1: open() "/var/log/nginx/error.log" failed (6: No such device or address)
# 解决方案: 因为是从容器中导出,/var/log/nginx/error.log是一个链接文件,链接到/dev/stdout,需要将这个链接文件删除,创建一个新的 error.log文件

# 5. 查询状态
systemctl status nginx

# 6. 访问
curl [http://localhost:80](http://localhost/)
  • 关联文档
text 复制代码
- Runc -- GitHub地址:GitHub - opencontainers/runc: CLI tool for spawning and running containers according to the OCI spec
- Host network issue:https://github.com/opencontainers/runc/issues/1552
- Runc -- linux-config文档地址:https://github.com/opencontainers/runtime-spec/blob/main/config.md