Ceph 集群组件管理
ceph orch 命令
ceph orch 命令与编排器模块交互,编排器模块是ceph-mgr的插件,与外部编排服务交互。
ceph orch 命令支持多个外部编排器:
- host:物理节点
- service type:服务类型,如nfs, mds, osd, mon, rgw, mgr, iscsi
- Service:逻辑服务
- Daemon:进程
cephadm使用的特殊标签:
- _no_schedule:不在此类标签的节点上部署或调度任何服务。
- _no_autotune_memory:不对此类标签的节点进行内存调优。
- _admin:自动将ceph.conf和ceph.client.admin.keyring发送到此类标签的节点上。
禁用服务自动扩展
Ceph 集群服务自动扩展功能,会自动部署ceph组件到存储节点。如果想手动管理 ceph 服务,则需要禁
用 ceph 服务自动扩展功能。
bash
# 查看 mon 服务
[root@ceph1 ~]# ceph orch ls mon
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
mon 3/3 90s ago 8m label:_admin
# 禁用 mon 服务自动扩展
[root@ceph1 ~]# ceph orch apply mon --unmanaged=true
[root@ceph1 ~]# ceph orch ls mon
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
mon 3/5 118s ago 4s <unmanaged>
# 启用 mon 服务自动扩展
[root@ceph1 ~]# ceph orch apply mon --unmanaged=false
[root@ceph1 ~]# ceph orch ls mon
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
mon 3/5 2m ago 4s count:5
# 通过标签部署 mon 服务
[root@ceph1 ~]# ceph orch apply mon --placement="label:_admin"
Scheduled mon update...
[root@ceph1 ~]# ceph orch ls mon
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
mon 3/3 3m ago 15s label:_admin
删除服务
以 crash 服务为例。
bash
# 禁用服务自动扩展
[root@ceph1 ~]# ceph orch apply crash --unmanaged=true
[root@ceph1 ~]# ceph orch ls crash
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
crash 3/3 3m ago 5s <unmanaged>
# 查看服务中实例
[root@ceph1 ~]# ceph orch ps | grep crash
crash.ceph1 ceph1.laogao.cloud running (5m) 4m ago 24m 6639k - 16.2.15
3c4eff6082ae 5aea4634442b
crash.ceph2 ceph2.laogao.cloud running (5m) 4m ago 15m 6639k - 16.2.15
3c4eff6082ae 51d5f1e1d75c
crash.ceph3 ceph3.laogao.cloud running (4m) 4m ago 15m 6647k - 16.2.15
3c4eff6082ae 406b2a7b9d93
# 删除特定实例
[root@ceph1 ~]# ceph orch daemon rm crash.ceph1
Removed crash.ceph1 from host 'ceph1.laogao.cloud'
[root@ceph1 ~]# ceph orch ps | grep crash
crash.ceph2 ceph2.laogao.cloud running (9m) 8m ago 19m 6639k - 16.2.15
3c4eff6082ae 51d5f1e1d75c
crash.ceph3 ceph3.laogao.cloud running (8m) 8m ago 19m 6647k - 16.2.15
3c4eff6082ae 406b2a7b9d93
# 删除服务
[root@ceph1 ~]# ceph orch rm crash
Removed service crash
[root@ceph1 ~]# ceph orch ls crash #这里需要快点查看
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
crash 1/3 <deleting> 50s <unmanaged>
[root@ceph1 ~]# ceph orch ls crash
No services reported
部署服务
使用 ceph 的编排器部署服务, 有两种方式:
-
**apply 方式:**定义了服务状态,由编排器根据参数自动寻找合适的节点进行服务部署。
语法: ceph orch apply <service_type> --placement=\
[-- unmanaged]
例如: ceph orch apply crash 和 ceph orch apply mon --placement="label:_admin" 。
-
**daemon add 方式:**根据命令中的参数,直接进行服务部署。
语法: ceph orch daemon add <daemon_type>
命令 orch apply osd --all-available-devices 将节点上的所有可用设备配置为OSD。
删除 OSD
确定 OSD 和设备关系
bash
[root@ceph1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.17537 root default
-3 0.05846 host ceph1 #主机
ceph1下的硬盘
0 hdd 0.01949 osd.0 up 1.00000 1.00000 #看不出来
osd.0对应哪块磁盘
3 hdd 0.01949 osd.3 up 1.00000 1.00000
6 hdd 0.01949 osd.6 up 1.00000 1.00000
-5 0.05846 host ceph2
2 hdd 0.01949 osd.2 up 1.00000 1.00000
4 hdd 0.01949 osd.4 up 1.00000 1.00000
7 hdd 0.01949 osd.7 up 1.00000 1.00000
-7 0.05846 host ceph3
1 hdd 0.01949 osd.1 up 1.00000 1.00000
5 hdd 0.01949 osd.5 up 1.00000 1.00000
8 hdd 0.01949 osd.8 up 1.00000 1.00000
# 获取集群id
[root@ceph1 ~]# ceph -s | grep id
id: 2faf683a-7cbf-11f0-b5ba-000c29e0ad0e
# 登录到ceph1上确认osd.0使用的块设备
[root@ceph1 ~]# ls -l /var/lib/ceph/2faf683a-7cbf-11f0-b5ba000c29e0ad0e/osd.0/block
lrwxrwxrwx
1 ceph ceph 93 Aug 19 14:01 /var/lib/ceph/2faf683a-7cbf-11f0-b5ba000c29e0ad0e/osd.0/block
-> /dev/ceph-c92942fb-f959-4255-b8e6-751fab70fa79/osdblock-2ed79b2f-d825-4829-b4b0-59879d2ad99c
#
59879d2ad99c是块设备名称最后一串字符
[root@ceph1 ~]# lsblk | grep -B1 59879d2ad99c
sdb
8:16 0 20G 0 disk
└─ceph--c92942fb--f959--4255--b8e6--751fab70fa79-osd--block--2ed79b2f--d825-
-4829--b4b0--59879d2ad99c 253:4 0 20G 0 lvm
# 确认osd.0对应sdb
# 用下面命令也可以
[root@ceph1 ~]# ceph osd metadata 0 #0是osd编号
[root@ceph1 ~]# ceph osd metadata 0
{
"id": 0,
"arch": "x86_64",
"back_addr": "
[v2:192.168.108.11:6802/914648970,v1:192.168.108.11:6803/914648970]",
"back_iface": "",
"bluefs": "1",
"bluefs_dedicated_db": "0",
"bluefs_dedicated_wal": "0",
"bluefs_single_shared_device": "1",
"bluestore_bdev_access_mode": "blk",
"bluestore_bdev_block_size": "4096",
"bluestore_bdev_dev_node": "/dev/dm-2",
"bluestore_bdev_devices": "sdb", 看这里`
"bluestore_bdev_driver": "KernelDevice",
"bluestore_bdev_partition_path": "/dev/dm-2",
"bluestore_bdev_rotational": "1",
"bluestore_bdev_size": "21470642176",
"bluestore_bdev_support_discard": "0",
"bluestore_bdev_type": "hdd",
"bluestore_min_alloc_size": "4096",
"ceph_release": "pacific",
"ceph_version": "ceph version 16.2.15
(618f440892089921c3e944a991122ddc44e60516) pacific (stable)",
"ceph_version_short": "16.2.15",
"ceph_version_when_created": "ceph version 16.2.15
(618f440892089921c3e944a991122ddc44e60516) pacific (stable)",
"container_hostname": "ceph1.laogao.cloud",
"container_image":
"quay.io/ceph/ceph@sha256:6ba107eb55617994a9e6ed49fb938828c2ed3121aa19ceeffbf8e28
608535d94",
"cpu": "Intel(R) Core(TM) Ultra 9 185H",
"created_at": "2026-03-27T06:01:45.491319Z",
"default_device_class": "hdd",
"device_ids": "",
"device_paths": "sdb=/dev/disk/by-path/pci-0000:00:10.0-scsi-0:0:1:0",
"devices": "sdb",
"distro": "centos",
"distro_description": "CentOS Stream 8",
"distro_version": "8",
"front_addr": "
[v2:192.168.108.11:6800/914648970,v1:192.168.108.11:6801/914648970]",
"front_iface": "",
"hb_back_addr": "
[v2:192.168.108.11:6806/914648970,v1:192.168.108.11:6807/914648970]",
"hb_front_addr": "
[v2:192.168.108.11:6804/914648970,v1:192.168.108.11:6805/914648970]",
"hostname": "ceph1.laogao.cloud",
"journal_rotational": "1",
"kernel_description": "#1 SMP Thu May 30 04:13:58 UTC 2024",
"kernel_version": "4.18.0-553.6.1.el8.x86_64",
"mem_swap_kb": "8245244",
"mem_total_kb": "7849664",
"network_numa_unknown_ifaces": "back_iface,front_iface",
"objectstore_numa_unknown_devices": "sdb",
"os": "Linux",
"osd_data": "/var/lib/ceph/osd/ceph-0",
"osd_objectstore": "bluestore",
"osdspec_affinity": "all-available-devices",
"rotational": "1"
}
实现脚本
bash
[root@ceph1 ~]# vim /usr/local/bin/show-osd-device
#!/bin/bash
# author laogao
# date 2025-08-19
# Description 确认 osd 和 device 之间对应关系
# usage 在ceph node 上执行
cluster_id=$(ceph -s|grep id |awk '{print $2}')
cd /var/lib/ceph/${cluster_id}
for osd in osd.*
do
device_id=$(ls -l $osd/block | awk -F '-' '{print $NF}')
device=/dev/$(lsblk |grep -B1 ${device_id} |grep -v ${device_id} | awk
'{print $1}')
echo $osd : $device
done
[root@ceph1 ~]# chmod +x /usr/local/bin/show-osd-device
执行效果
[root@ceph1 ~]# show-osd-device
osd.0 : /dev/sdb
osd.3 : /dev/sdc
osd.6 : /dev/sdd
编排删除
编排删除做了什么?
- 自动 drain 数据(如果未提前 out,某些版本会拒绝)
- 通过 cephadm 在目标主机上停止 OSD 容器
- 自动执行:
- ceph osd crush remove
- ceph auth del
- ceph osd rm
-
(可选)自动 zap 磁盘/LVM(加 --zap 参数)
-
更新 orchestrator 内部状态,避免 reconcile 时重
示例:删除 osd.0
bash
# 禁用 osd 服务自动扩展
[root@ceph1 ~]# ceph orch apply osd --all-available-devices --unmanaged=true
[root@ceph1 ~]# ceph orch ls osd
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
osd.all-available-devices 9 7m ago 6s <unmanaged>
# 第一种(先标记out,再删除)
[root@ceph1 ~]# ceph osd out osd.0
[root@ceph1 ~]# ceph orch osd rm 0
# 删除device上数据
# 删除前lsblk看下sdb
[root@ceph1 ~]# ceph orch device zap ceph1.laogao.cloud /dev/sdb --force #看
osd0管理的是/dev/sdb还是啥
zap successful for /dev/sdb on ceph1.laogao.cloud
# 确认结果
[root@ceph1 ~]# ceph orch device ls
[root@ceph1 ~]# ceph orch device ls | grep ceph1.*sdb
ceph1.laogao.cloud /dev/sdb hdd 20.0G
Yes 72s ago
[root@ceph1 ~]# lsblk
[root@ceph1 ~]# lsblk /dev/sdb
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sdb 8:16 0 20G 0 disk
# 第二种 直接删除加格式化磁盘
[root@ceph1 ~]# ceph osd out osd.3
[root@ceph1 ~]# ceph orch osd rm 3 --force --zap
# 添加回来
[root@ceph1 ~]# ceph orch apply osd --all-available-devices
#RUNNING重新变为9
[root@ceph1 ~]# ceph orch ls osd
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
osd.all-available-devices 9 6m ago 29s *
手动删除
这是传统方式,在 没有 orchestrator 或旧版 Ceph 中常见,在 cephadm 集群中不推荐。
bash
# 将OSD标记为 出群(迁移数据),等待集群数据重平衡完成(ceph -s 看到 active+clean)
[root@ceph1 ~]# ceph osd out osd.0
# 确认osd.0在哪个节点
[root@ceph1 ~]# ceph osd tree
# 手动停服务(登录主机)
[root@ceph1 ~]# systemctl stop ceph-<fsid>@osd.0.service # 或 podman stop ...
[root@ceph1 ~]# ceph osd tree
# 从CRUSH地图删除OSD
[root@ceph1 ~]# ceph osd crush remove osd.0
removed item id 0 name 'osd.0' from crush map
[root@ceph1 ~]# ceph osd tree
# 删除OSD认证密钥
[root@ceph1 ~]# ceph auth del osd.0
updated
# 从集群中删除OSD
[root@ceph1 ~]# ceph osd rm 0
removed osd.0
[root@ceph1 ~]# ceph osd tree
# 解除磁盘占用(可选,用于重新使用磁盘)
[root@ceph1 ~]# cephadm shell -- ceph-volume lvm zap /dev/sdb --destroy
风险:
- 漏步骤:比如忘了 auth del ,导致 OSD ID 无法复用。
- 顺序错误:先删 daemon 再 out,可能引发 PG 异常。
- 磁盘残留:LVM 未清理,新 OSD 无法部署到同一设备。
- Orchestrator 不知情:下次 ceph orch apply osd --all-available-devices 可能报错或忽
略该设备。
删除主机
从集群中删除主机流程:
- 禁用集群所有服务自动扩展
- 查看待删除主机上当前运行的服务
- 停止待删除主机上的所有服务
- 删除主机上的所有服务
- 删除osd在CRUSH中的映射
- 擦除osd盘中的数据
- 从集群中删除主机
**示例:**删除 ceph2
**首先,**禁用集群中所有ceph服务自动扩展,进制自动部署osd。
bash
[root@ceph1 ~]# for service in $(ceph orch ls |grep -v -e NAME -e osd| awk
'{print $1}');do ceph orch apply $service --unmanaged=true;done
[root@ceph1 ~]# ceph orch apply osd --all-available-devices --unmanaged=true
**其次,**删除主机上运行的服务。
bash
# 查看ceph2上运行的daemon
[root@ceph1 ~]# ceph orch ps |grep ceph2 |awk '{print $1}'
crash.ceph2
mgr.ceph2.oetbal
mon.ceph2
node-exporter.ceph2
osd.2
osd.4
osd.7
# 删除相应 daemon
[root@ceph1 ~]# for daemon in $(ceph orch ps |grep ceph2 |awk '{print $1}');do
ceph orch daemon rm $daemon --force;done
# 手动清理crush信息
[root@ceph1 ~]# ceph osd crush rm osd.2
[root@ceph1 ~]# ceph osd crush rm osd.4
[root@ceph1 ~]# ceph osd crush rm osd.7
[root@ceph1 ~]# ceph osd crush rm ceph2
[root@ceph1 ~]# ceph osd rm 2 4 7
# 清理磁盘数据
[root@ceph1 ~]# ceph orch device zap ceph2.laogao.cloud /dev/sdb --force
[root@ceph1 ~]# ceph orch device zap ceph2.laogao.cloud /dev/sdc --force
[root@ceph1 ~]# ceph orch device zap ceph2.laogao.cloud /dev/sdd --force
**然后,**删除主机。
bash
[root@ceph1 ~]# ceph orch host rm ceph2
[root@ceph1 ~]# ceph orch host ls #查看现象ceph2被移除
HOST ADDR LABELS STATUS
ceph1.laogao.cloud 192.168.108.11 _admin
ceph3.laogao.cloud 192.168.108.13 _admin
2 hosts in cluster
最后,删除ceph2中相应ceph遗留文件。
bash
[root@ceph2 ~]# rm -rf /var/lib/ceph
[root@ceph2 ~]# rm -rf /etc/ceph /etc/systemd/system/ceph*
[root@ceph2 ~]# rm -rf /var/log/ceph
实验完成后,恢复环境。