1 PG数量限制问题
1.1 原因分析
1.还是老样子,先创建存储池,在初始化为rbd。
bash
[root@ceph141~]# ceph osd pool create wenzhiyong-k8s 128 128
Error ERANGE: pg_num 128 size 3 for this pool would result in 295 cumulative PGs per OSD
(2067 total PG replicas on 7 'in' root OSDs by crush rule) which exceeds the mon_max_pg_per_osd value
of 250
意思说:创建1个存储池wenzhiyong-k8s且指定PG数量是128,副本3个;将会产生128*3=384个PG
但是集群中每个OSD最多只能有250个PG。我是7个OSD,所以集群最大OSD数量是250*7=1750个
目前集群有多个PG了呢?可以通过ceph osd pool ls detail
命令查看并累加得出:1 + 16 + 32 + 32 + 32 + 32 + 32 + 32 + 32 + 32 + 256 + 32 = 561 ;但是因为3副本机制,所以当前集群的PG数量还要再乘3为1683个,再加上即将创建的384个PG很明显是超过集群的最大PG1750的
1.2 故障解决方式
- 上策,增加OSD数量。说白了,就是加设备,加机器。
- 中策,调大OSD存储PG的上限,可以临时解决,治标不治本,因为随着PG数量增多,会给该磁盘添加I/O压力。
- 下策,创建存储池时指定略少的PG数量。如:
ceph osd pool create wenzhiyong-k8s 2 2
- 下下策,删除没有用的存储池,已释放PG数量。也可以临时解决,但是要确定的确是没有用的数据,再做操作。
1.临时修改,修改完后可能需要重启集群才能生效。但实测后发现没用
bash
[root@ceph141~]# ceph tell osd.* injectargs --mon-max-pg-per-osd 500
osd.0: {}
osd.0: mon_max_pg_per_osd = '' (not observed, change may require restart) osd_delete_sleep = '' osd_delete_sleep_hdd = '' osd_delete_sleep_hybrid = '' osd_delete_sleep_ssd = '' osd_max_backfills = '' osd_pg_delete_cost = '' (not observed, change may require restart) osd_recovery_max_active = '' osd_recovery_max_active_hdd = '' osd_recovery_max_active_ssd = '' osd_recovery_sleep = '' osd_recovery_sleep_hdd = '' osd_recovery_sleep_hybrid = '' osd_recovery_sleep_ssd = '' osd_scrub_sleep = '' osd_snap_trim_sleep = '' osd_snap_trim_sleep_hdd = '' osd_snap_trim_sleep_hybrid = '' osd_snap_trim_sleep_ssd = ''
osd.1: {}
...
[root@ceph141~]# ceph osd pool create wenzhiyong-k8s 128 128
Error ERANGE: pg_num 128 size 3 for this pool would result in 295 cumulative PGs per OSD (2067 total PG replicas on 7 'in' root OSDs by crush rule) which exceeds the mon_max_pg_per_osd value of 250
[root@ceph141~]# systemctl reboot docker
2.永久修改方式,/etc/ceph/ceph.conf
bash
[osd]
mon_max_pg_per_osd = 100
最终选择了指定少量的存储池
bash
1.创建K8S特用的存储池
[root@ceph141~]# ceph osd pool create wenzhiyong-k8s 2 2
pool 'wenzhiyong-k8s' created
2.创建镜像块设备文件
[root@ceph141 ~]# rbd create -s 5G wenzhiyong-k8s/nginx-web
[root@ceph141 ~]#
[root@ceph141 ~]# rbd -p wenzhiyong-k8s ls
nginx-web
[root@ceph141 ~]#
[root@ceph141 ~]# rbd -p wenzhiyong-k8s info nginx-web
rbd image 'nginx-web':
size 5 GiB in 1280 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 12214b350eaa5
block_name_prefix: rbd_data.12214b350eaa5
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
op_features:
flags:
create_timestamp: Fri Aug 23 16:34:00 2024
access_timestamp: Fri Aug 23 16:34:00 2024
modify_timestamp: Fri Aug 23 16:34:00 2024
3.ceph集群将ceph管理员的秘钥环keyring拷贝到所有的worker节点
[root@ceph141 ~]# scp /etc/ceph/ceph.client.admin.keyring 10.0.0.231:/etc/ceph/
[root@ceph141 ~]# scp /etc/ceph/ceph.client.admin.keyring 10.0.0.232:/etc/ceph/
[root@ceph141 ~]# scp /etc/ceph/ceph.client.admin.keyring 10.0.0.233:/etc/ceph/
2RBD作为volumes
2.1 基于keyring的方式
yaml
cat > 03-rdb-admin-keyring.yaml << EOF
apiVersion: apps/v1
kind: Deployment
metadata:
name: deploy-volume-rbd-admin-keyring
spec:
replicas: 1
selector:
matchLabels:
apps: ceph-rbd
template:
metadata:
labels:
apps: ceph-rbd
spec:
volumes:
- name: data
rbd:
monitors:
- 10.0.0.141:6789
- 10.0.0.142:6789
- 10.0.0.143:6789
# 指定存储池
pool: wenzhiyong-k8s
# 指定块设备镜像
image: nginx-web
# 指定文件系统,目前仅支持: "ext4", "xfs", "ntfs"。
fsType: xfs
# 块设备是否只读,默认值为false。
readOnly: false
# 指定连接ceph集群的用户,若不指定,默认为admin
user: admin
# 指定ceph秘钥环的路径,默认值为: "/etc/ceph/keyring"
keyring: "/etc/ceph/ceph.client.admin.keyring"
containers:
- name: c1
image: registry.cn-hangzhou.aliyuncs.com/yinzhengjie-k8s/apps:v1
volumeMounts:
- name: data
mountPath: /wenzhiyong-data
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: svc-rbd
spec:
type: NodePort
selector:
apps: ceph-rbd
ports:
- protocol: TCP
port: 80
targetPort: 80
nodePort: 30033
EOF
1.运用K8S配置文件进入容器测试
bash
/wenzhiyong-data # cd /
/ # rm -rf /wenzhiyong-data/
rm: can't remove '/wenzhiyong-data': Resource busy
/ # df -h | grep wen
/dev/rbd0 5.0G 68.1M 4.9G 1% /wenzhiyong-data
/ # ls -l /dev/rbd0
ls: /dev/rbd0: No such file or directory
/ # umount /wenzhiyong-data/
umount: can't unmount /wenzhiyong-data: Operation not permitted
/ # rm -rf /wenzhiyong-data/
rm: can't remove '/wenzhiyong-data': Resource busy
2.查看该pod调度到了worker232节点
bash
[root@master23104-cephfs]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
deploy-volume-rbd-admin-keyring-6b94f8cc86-nnpjd 1/1 Running 0 6m37s 10.100.2.45 worker232
3.在worker232节点查看,这个rbd挂载给了pod使用
bash
[root@worker232~]# ll /dev/rbd*
brw-rw---- 1 root disk 252, 0 Nov 8 22:38 /dev/rbd0
/dev/rbd:
total 0
drwxr-xr-x 2 root root 60 Nov 8 22:38 wenzhiyong-k8s/
[root@worker232~]# df -h | grep rbd
/dev/rbd0 5.0G 69M 5.0G 2% /var/lib/kubelet/plugins/kubernetes.io/rbd/mounts/wenzhiyong-k8s-image-nginx-web
4.删除该应用后,worker232的挂载也随之消失
bash
[root@worker232~]# df -h | grep rbd
[root@worker232~]#
2.2 基于秘钥的方式
1.获取ceph集群的admin账号的key信息并经过base64编码
bash
[root@worker232~]# awk '/key/ {printf "%s", $NF}' /etc/ceph/ceph.client.admin.keyring | more
AQAlsChnHubLJRAAH2s3vhyGrxgba8anloPDtg==
[root@worker232~]# awk '/key/ {printf "%s", $NF}' /etc/ceph/ceph.client.admin.keyring | base64
QVFBbHNDaG5IdWJMSlJBQUgyczN2aHlHcnhnYmE4YW5sb1BEdGc9PQ==
2.编写资源清单并把key封装为secret资源
yaml
cat > 04-rbd-admin-key.yaml << EOF
apiVersion: v1
kind: Secret
metadata:
name: ceph-admin-secret
type: "kubernetes.io/rbd"
data:
# 指定ceph的admin的KEY,将其进行base64编码,此处需要修改!
key: QVFBbHNDaG5IdWJMSlJBQUgyczN2aHlHcnhnYmE4YW5sb1BEdGc9PQ==
# 注意,data如果觉得麻烦,可以考虑使用stringData
#stringData:
# key: AQBeYMVm8+/UNhAAV8lxv/CvIm0Lyer1wSp9yA==
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: deploy-volume-rbd-secrets-keyring
spec:
replicas: 1
selector:
matchLabels:
apps: ceph-rbd
template:
metadata:
labels:
apps: ceph-rbd
spec:
volumes:
- name: data
rbd:
monitors:
- 10.0.0.141:6789
- 10.0.0.142:6789
- 10.0.0.143:6789
pool: wenzhiyong-k8s
image: nginx-web
fsType: xfs
readOnly: false
user: admin
secretRef:
# 指定用于存储ceph管理员的secret名称
name: ceph-admin-secret
containers:
- name: c1
image: registry.cn-hangzhou.aliyuncs.com/yinzhengjie-k8s/apps:v3
volumeMounts:
- name: data
mountPath: /wenzhiyong-data
ports:
- containerPort: 80
EOF
现象和基于keyring的一样
3RBD结合动态存储类
01 SC
yaml
cat > 01-rbd-sc.yaml << EOF
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: csi-rbd-sc
provisioner: rbd.csi.ceph.com
parameters:
# 指定集群的ID地址,需要改成自己的集群环境,通过ceph -s可以看到
clusterID: 12fad866-9aa0-11ef-8656-6516a17ad6dd
# 指定存储池
pool: wenzhiyong-k8s
# 镜像的特性
imageFeatures: layering
csi.storage.k8s.io/provisioner-secret-name: csi-rbd-secret
csi.storage.k8s.io/provisioner-secret-namespace: default
csi.storage.k8s.io/controller-expand-secret-name: csi-rbd-secret
csi.storage.k8s.io/controller-expand-secret-namespace: default
csi.storage.k8s.io/node-stage-secret-name: csi-rbd-secret
csi.storage.k8s.io/node-stage-secret-namespace: default
# 指定文件系统的类型
csi.storage.k8s.io/fstype: xfs
# 定义回收策略
reclaimPolicy: Delete
allowVolumeExpansion: true
mountOptions:
- discard
EOF
02 rbd secret
yaml
cat > csi-rbd-secret.yaml <<EOF
apiVersion: v1
kind: Secret
metadata:
name: csi-rbd-secret
namespace: default
# 对于stringData和Data字段有所不同,无需进行base64编码,说白了就是原样输出。
stringData:
# 指定用户名是admin,也可以自定义普通用户。
userID: admin
# 指定admin用户的key(注意要直接用ceph配置文件里面的秘钥)
userKey: AQAlsChnHubLJRAAH2s3vhyGrxgba8anloPDtg==
EOF
03 rbd configmap
yaml
cat > ceph-config-map.yaml <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: ceph-config
data:
# ceph集群的配置文件"/etc/ceph/ceph.conf",认证要求可以写成 cephx; fsid填写集群ID
ceph.conf: |
[global]
fsid = 12fad866-9aa0-11ef-8656-6516a17ad6dd
mon_initial_members = ceph141, ceph142, ceph143
mon_host = 10.0.0.141,10.0.0.142,10.0.0.143
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
# 要求存在keyring这个key,值为空
keyring: |
EOF
04 csi configmap
容器存储接口
yaml
cat > csi-config-map.yaml <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: ceph-csi-config
data:
# clusterID可以通过"ceph -s"获取集群ID
# monitors为你自己的ceph集群mon地址
config.json: |-
[
{
"clusterID": "12fad866-9aa0-11ef-8656-6516a17ad6dd",
"monitors": [
"10.0.0.141:6789",
"10.0.0.142:6789",
"10.0.0.143:6789"
]
}
]
EOF
05 csi-kms-config-map
yaml
cat > csi-kms-config-map.yaml <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: ceph-csi-encryption-kms-config
data:
# 可以不创建这个cm资源,但是需要将csi-rbdplugin-provisioner.yaml和csi-rbdplugin.yaml中kms有关内容注释掉。
# - deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml
# - deploy/rbd/kubernetes/csi-rbdplugin.yaml
config.json: |-
{}
EOF
06 定义PVC
yaml
cat > pvc.yaml <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: rbd-pvc01
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi
storageClassName: csi-rbd-sc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: rbd-pvc02
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 4Gi
storageClassName: csi-rbd-sc
EOF
07 创建csi接口
需要这些文件,下载地址:https://github.com/ceph/ceph-csi/tree/release-v3.7/deploy/rbd/kubernetes
==注:==前面6步的文件需要先apply,否则这些pod无法创建
csi-config-map.yaml csidriver.yaml csi-nodeplugin-rbac.yaml
csi-provisioner-rbac.yaml csi-rbdplugin-provisioner.yaml csi-rbdplugin.yaml
bash
[root@master231deploy]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
csi-rbdplugin-l24hj 3/3 Running 0 30m 10.0.0.233 worker233
csi-rbdplugin-provisioner-5dfcf67885-8rk48 7/7 Running 0 30m 10.100.1.30 worker233
csi-rbdplugin-provisioner-5dfcf67885-9wznm 7/7 Running 0 30m 10.100.2.48 worker232
csi-rbdplugin-qz7k6 3/3 Running 0 30m 10.0.0.232 worker232
08 查看rbd image和PVC的关联
1.在ceph集群可以看到有2个rbd image
bash
[root@ceph141~]# rbd ls -p wenzhiyong-k8s
csi-vol-d33df512-9df7-11ef-85fc-4a89d731ca68
csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68
nginx-web
[root@ceph141~]# rbd info wenzhiyong-k8s/csi-vol-d33df512-9df7-11ef-85fc-4a89d731ca68
rbd image 'csi-vol-d33df512-9df7-11ef-85fc-4a89d731ca68':
size 2 GiB in 512 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 392f1fe374022
block_name_prefix: rbd_data.392f1fe374022
format: 2
features: layering
op_features:
flags:
create_timestamp: Sat Nov 9 01:35:19 2024
access_timestamp: Sat Nov 9 01:35:19 2024
modify_timestamp: Sat Nov 9 01:35:19 2024
[root@ceph141~]# rbd info wenzhiyong-k8s/csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68
rbd image 'csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68':
size 4 GiB in 1024 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 392f182ce6323
block_name_prefix: rbd_data.392f182ce6323
format: 2
features: layering
op_features:
flags:
create_timestamp: Sat Nov 9 01:35:19 2024
access_timestamp: Sat Nov 9 01:35:19 2024
modify_timestamp: Sat Nov 9 01:35:19 2024
24
[root@ceph141~]# rbd info wenzhiyong-k8s/csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68
rbd image 'csi-vol-d33df570-9df7-11ef-85fc-4a89d731ca68':
size 4 GiB in 1024 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 392f182ce6323
block_name_prefix: rbd_data.392f182ce6323
format: 2
features: layering
op_features:
flags:
create_timestamp: Sat Nov 9 01:35:19 2024
access_timestamp: Sat Nov 9 01:35:19 2024
modify_timestamp: Sat Nov 9 01:35:19 2024