本站以分享各种运维经验和运维所需要的技能为主
《python零基础入门》:python零基础入门学习
《python运维脚本》: python运维脚本实践
《shell》:shell学习
《terraform》持续更新中:terraform_Aws学习零基础入门到最佳实战
《k8》暂未更新
《docker学习》暂未更新
《ceph学习》ceph日常问题解决分享
《日志收集》ELK+各种中间件
《运维日常》运维日常
《linux》运维面试100问
要求版本:L版以上
centos系统:
ceph osd getmap -o osd_map
osdmaptool --test-map-pgs --pool 5 osd_map
#这里6是我的数据盘pool的id号,自行寻找
osdmaptool osd_map --upmap output_upmap --upmap-pool mydata --upmap-max 100 --upmap-deviation 1
#并限制了最大操作数量为 100。还指定了最大偏差为 1,表示只有当比例差异大于等于 1% 时才会触发自动 rebalance。
source output_upmap
参数参考:
[root@xxx]# osdmaptool thisosdmap --upmap afterupmap --upmap-pool tupu.rgw.buckets.data --upmap-max 100 --upmap-deviation 0.01
osdmaptool: upmap-deviation must be >= 1
usage: [--print] <mapfilename>
--create-from-conf creates an osd map with default configurations
--createsimple <numosd> [--clobber] [--pg-bits <bitsperosd>] [--pgp-bits <bits>] creates a relatively generic OSD map with <numosd> devices
--pgp-bits <bits> pgp_num map attribute will be shifted by <bits>
--pg-bits <bits> pg_num map attribute will be shifted by <bits>
--clobber allows osdmaptool to overwrite <mapfilename> if it already exists
--export-crush <file> write osdmap's crush map to <file>
--import-crush <file> replace osdmap's crush map with <file>
--health dump health checks
--test-map-pgs [--pool <poolid>] [--pg_num <pg_num>] [--range-first <first> --range-last <last>] map all pgs
--test-map-pgs-dump [--pool <poolid>] [--range-first <first> --range-last <last>] map all pgs
--test-map-pgs-dump-all [--pool <poolid>] [--range-first <first> --range-last <last>] map all pgs to osds
--mark-up-in mark osds up and in (but do not persist)
--mark-out <osdid> mark an osd as out (but do not persist)
--with-default-pool include default pool when creating map
--clear-temp clear pg_temp and primary_temp
--clean-temps clean pg_temps
--test-random do random placements
--test-map-pg <pgid> map a pgid to osds
--test-map-object <objectname> [--pool <poolid>] map an object to osds
--upmap-cleanup <file> clean up pg_upmap[_items] entries, writing
commands to <file> [default: - for stdout]
--upmap <file> calculate pg upmap entries to balance pg layout
writing commands to <file> [default: - for stdout]
--upmap-max <max-count> set max upmap entries to calculate [default: 10]
--upmap-deviation <max-deviation>
max deviation from target [default: 5]
--upmap-pool <poolname> restrict upmap balancing to 1 or more pools
--upmap-save write modified OSDMap with upmap changes
--upmap-active Act like an active balancer, keep applying changes until balanced
--dump <format> displays the map in plain text when <format> is 'plain', 'json' if specified format is not supported
--tree displays a tree of the map
--test-crush [--range-first <first> --range-last <last>] map pgs to acting osds
红帽系统
使用 upmap 在 OSD 上手动重新平衡数据
作为存储管理员,您可以通过将选定的放置组 (PG) 移到特定的 OSD,在 OSD 上手动重新平衡数据。要执行手动重新平衡,请关闭 Ceph Manager balancer 模块,并使用 upmap
模式来移动 PG。
先决条件
- 正在运行的红帽存储集群。
- 对存储集群中所有节点的根级别访问权限。
流程
-
确保 balancer 模块位于:
示例
[root@mon ~]# ceph mgr module ls | more { "always_on_modules": [ "balancer", "crash", "devicehealth", "orchestrator_cli", "progress", "rbd_support", "status", "volumes" ], "enabled_modules": [ "dashboard", "pg_autoscaler", "prometheus" ],
-
如果 balancer 模块没有列在
always_on
或enabled
模块中,则启用它:语法
ceph mgr module enable balancer
-
-
将负载均衡器模式设置为
upmap
:语法
ceph balancer mode upmap
-
关闭 balancer 模块:
语法
ceph balancer off
-
检查负载均衡器状态:
示例
[root@mon ~]# ceph balancer status { "plans": [], "active": false, "last_optimize_started": "", "last_optimize_duration": "", "optimize_result": "", "mode": "upmap" }
-
为 OSD 设置
norebalance
标志:语法
ceph osd set norebalance
-
使用
ceph pg dump pgs_brief
命令列出存储集群中的池,各自消耗的空间。使用grep
搜索重新映射的池。示例
[root@mon ~]# ceph pg dump pgs_brief PG_STAT STATE UP UP_PRIMARY ACTING ACTING_PRIMARY dumped pgs_brief 7.270 active+remapped+backfilling [8,48,61] 8 [46,48,61] 46 7.1e7 active+remapped+backfilling [73,64,74] 73 [18,64,74] 18 7.1c1 active+remapped+backfilling [29,14,8] 29 [29,14,24] 29 7.17f active+remapped+backfilling [73,71,50] 73 [50,71,69] 50 7.16c active+remapped+backfilling [66,8,4] 66 [66,4,57] 66 7.13d active+remapped+backfilling [73,27,56] 73 [27,56,35] 27 7.130 active+remapped+backfilling [53,47,73] 53 [53,47,72] 53 9.e0 active+remapped+backfilling [8,75,14] 8 [14,75,58] 14 7.db active+remapped+backfilling [10,57,60] 10 [10,60,50] 10 9.7 active+remapped+backfilling [26,69,38] 26 [26,38,41] 26 7.4a active+remapped+backfilling [73,10,76] 73 [10,76,29] 10 9.9a active+remapped+backfilling [20,15,73] 20 [20,15,29] 20 7.ac active+remapped+backfilling [8,74,3] 8 [3,74,37] 3 9.c2 active+remapped+backfilling [57,75,7] 57 [4,75,7] 4 7.34d active+remapped+backfilling [23,46,73] 23 [23,46,56] 23 7.36a active+remapped+backfilling [40,32,8] 40 [40,32,44] 40
-
将 PG 移到您希望它们所在的 OSD。例如,将 PG 7.ac 从 OSD 8 和 3 移到 OSD 3 和 37:
示例
PG_STAT STATE UP UP_PRIMARY ACTING ACTING_PRIMARY dumped pgs_brief 7.ac active+remapped+backfilling [8,74,3] 8 [3,74,37] 3 [root@mon ~]# ceph osd pg-upmap-items 7.ac 8 3 3 37 7.ac active+clean [3,74,37] 8 [3,74,37] 3
注意
重复此步骤,以每次移动每个重新 map 的 PG。
-
再次使用
ceph pg dump pgs_brief
命令检查 PG 是否移至active+clean
状态:示例
[root@mon ~]# ceph pg dump pgs_brief PG_STAT STATE UP UP_PRIMARY ACTING ACTING_PRIMARY dumped pgs_brief 7.270 active+clean [8,48,61] 8 [46,48,61] 46 7.1e7 active+clean [73,64,74] 73 [18,64,74] 18 7.1c1 active+clean [29,14,8] 29 [29,14,24] 29 7.17f active+clean [73,71,50] 73 [50,71,69] 50 7.16c active+clean [66,8,4] 66 [66,4,57] 66 7.13d active+clean [73,27,56] 73 [27,56,35] 27 7.130 active+clean [53,47,73] 53 [53,47,72] 53 9.e0 active+clean [8,75,14] 8 [14,75,58] 14 7.db active+clean [10,57,60] 10 [10,60,50] 10 9.7 active+clean [26,69,38] 26 [26,38,41] 26 7.4a active+clean [73,10,76] 73 [10,76,29] 10 9.9a active+clean [20,15,73] 20 [20,15,29] 20 7.ac active+clean [3,74,37] 8 [3,74,37] 3 9.c2 active+clean [57,75,7] 57 [4,75,7] 4 7.34d active+clean [23,46,73] 23 [23,46,56] 23 7.36a active+clean [40,32,8] 40 [40,32,44] 40
PG 移至
active+clean
所需的时间取决于 PG 和 OSD 的数量。另外,错误放入的对象数量取决于为mgr target_max_misplaced_ratio
设置的值。为target_max_misplaced_ratio
设置了一个更高的值,则会导致大量错误替换的对象;因此,所有 PG 都需要较长的时间才能变为active+clean
。 -
取消设置
norebalance
标志:语法
ceph osd unset norebalance
-
重新打开 balancer 模块:
语法
ceph balancer on
启用 balancer 模块后,它会逐渐根据存储集群的 CRUSH 规则将 PG 移到其预期的 OSD 中。平衡过程可能需要一些时间,但最终完成。
参考了鼠哥的方案和红帽官方资料:
超实用的pg均衡工具upmap | 奋斗的松鼠 - Blog