ceph集群提示pgs: 100.000% pgs unknown的一个解决办法
2018年11月05日 14:36:10 帮我起个网名 阅读数 1607更多
分类专栏: ceph
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/u014104588/article/details/83687934
搭完集群后,ceph -w提示
[root@node1 ~]# ceph -w
cluster:
id: 2386c327-8eb1-4dd7-9fed-fedff947c383
health: HEALTH_WARN
Reduced data availability: 128 pgs inactive
services:
mon: 2 daemons, quorum node0,node1
mgr: openstack(active)
osd: 3 osds: 3 up, 3 in
data:
pools: 1 pools, 128 pgs
objects: 0 objects, 0 B
usage: 6.0 GiB used, 118 GiB / 124 GiB avail
pgs: 100.000% pgs unknown
128 unknown
在网上查了很久说是需要将osd crush update on start 设置为true,我试了之后发现也不行,最后发现是crush map设置的不对。
从下面可以看出我的host不在root之中
[root@node1 ~]# ceph osd crush tree
ID CLASS WEIGHT TYPE NAME
-7 1.00000 host node2
2 hdd 1.00000 osd.2
-5 1.00000 host node1
1 hdd 1.00000 osd.1
-2 1.00000 host node0
0 hdd 1.00000 osd.0
-1 0 root default
host node0 {
id -2 # do not change unnecessarily
id -3 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.0 weight 1.000
}
host node1 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.1 weight 1.000
}
host node2 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.2 weight 1.000
}
# buckets
root default {
id -1 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 0.000
alg straw2
hash 0 # rjenkins1
}
修改方法
将crush map导出
ceph osd getcrushmap -o /tmp/mycrushmap
将crush map反编译为可读模式
crushtool -d /tmp/mycrushmap > /tmp/mycrushmap.txt
[root@ceph1 ~]# more /tmp/mycrushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root
# buckets
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 0.000
alg straw2
hash 0 # rjenkins1
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
[root@ceph1 ~]#
发现缺少
host ceph1 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.0 weight 1.000
}
host ceph2 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.1 weight 1.000
}
host ceph3 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.2 weight 1.000
}
和
item ceph1 weight 1.000
item ceph2 weight 1.000
item ceph3 weight 1.000
将host类型所属的主机节点加入到root中,并注意root列必须位于所有的host列下面
[root@ceph1 ~]# more /tmp/mycrushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root
host ceph1 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.0 weight 1.000
}
host ceph2 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.1 weight 1.000
}
host ceph3 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 1.000
alg straw2
hash 0 # rjenkins1
item osd.2 weight 1.000
}
# buckets
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 0.000
alg straw2
hash 0 # rjenkins1
item ceph1 weight 1.000
item ceph2 weight 1.000
item ceph3 weight 1.000
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
[root@ceph1 ~]#
4:
将crush map编译
crushtool -c /tmp/mycrushmap.txt -o /tmp/newcrushmap
将修改过的crush map导入集群
ceph osd setcrushmap -i /tmp/newcrushmap
查看集群状态已经变成正常
[root@ceph1 ~]# ceph -s
cluster:
id: f75e1135-05c8-4765-9503-bb353722c879
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph1,ceph2,ceph3 (age 40m)
mgr: ceph1(active, since 76m), standbys: ceph3, ceph2
osd: 3 osds: 3 up (since 76m), 3 in (since 76m)
data:
pools: 1 pools, 128 pgs
objects: 0 objects, 0 B
usage: 3.0 GiB used, 432 GiB / 435 GiB avail
pgs: 128 active+clean
[root@ceph1 ~]# ceph osd crush tree
ID CLASS WEIGHT TYPE NAME
-1 3.00000 root default
-3 1.00000 host ceph1
0 hdd 1.00000 osd.0
-5 1.00000 host ceph2
1 hdd 1.00000 osd.1
-7 1.00000 host ceph3
2 hdd 1.00000 osd.2
[root@ceph1 ~]#