十、ceph集群运维

一、OSD横向扩容(基于rack和bluestore)

1、系统初始化

#!/bin/bash

sys_init(){

	systemctl stop firewalld
	systemctl disable firewalld
	sed -i 's/enforcing/disabled/' /etc/selinux/config
	setenforce 0
	systemctl disable NetworkManager
	systemctl stop NetworkManager

	timedatectl set-timezone Asia/Shanghai
	yum -y install ntpdate
	systemctl restart ntpd
	systemctl enable ntpd
	ntpdate ntp1.aliyun.com  #生产环境建议使用多个内网yum源,时间不同步会导致集群故障

	swapoff -a

	echo '* soft nofile 65535' >>/etc/security/limits.conf
	echo '* hard nofile 65535' >>/etc/security/limits.conf

	echo 'kernel.pid_max = 4194303' >>/etc/sysctl.conf
	echo 'vm.swappiness = 0' >>/etc/sysctl.conf 

	sysctl -p

}

ceph_optimization(){

	#关闭numa: https://blog.csdn.net/qq_34065508/article/details/103358812
	#优化参考: https://blog.csdn.net/cybertan/article/details/51131444
	

	#read_ahead,通过数据预读并且记载到随机访问内存方式提高磁盘读操作
	echo "8192" > /sys/block/sda/queue/read_ahead_kb

	#I/O Scheduler,SSD要用noop,SATA/SAS使用deadline
	#echo "deadline" >/sys/block/sd[x]/queue/scheduler
	#echo "noop" >/sys/block/sd[x]/queue/scheduler

	echo "noop" >/sys/block/sda/queue/scheduler
	echo "noop" >/sys/block/sdb/queue/scheduler
	echo "deadline" >/sys/block/sdc/queue/scheduler
	echo "deadline" >/sys/block/sdd/queue/scheduler

}

yum_config(){

	mkdir -p /etc/yum.repos.d/backup-repo
	mv /etc/yum.repos.d/*.repo /etc/yum.repos.d/backup-repo
	
	curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
	curl -o /etc/yum.repos.d/epel-7.repo http://mirrors.aliyun.com/repo/epel-7.repo

cat >>/etc/yum.repos.d/ceph.repo<<EOF
[Ceph]
name=Ceph packages for x86_64
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/x86_64/
gpgcheck=0
priority=1

[Ceph-noarch]
name=Ceph noarch packages
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch
gpgcheck=0
priority=1

[ceph-source]
name=Ceph source packages
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/SRPMS
gpgcheck=0
priority=1
EOF


	yum makecache
	yum -y install epel-release

}


main(){
	
	sys_init
	ceph_optimization	
	yum_config

}

main

2、配置ntp服务

cat /etc/ntp.conf
server ntp.aliyun.com iburst

systemctl restart ntpd

3、配置解析修改所有节点hosts解析或者公共dns解析

4、配置bluestore
osd节点

pvcreate /dev/sdb
vgcreate bluestore_ssd /dev/sdb
for i in {c..d};do lvcreate -n blockdb_sd$i -L 2G bluestore_ssd; lvcreate -n blockwal_sd$i -L 2G bluestore_ssd; done

ceph管理节点
5、ceph数据recovery配置策略
1>完全保证client带宽场景

完全关闭数据重建及迁移
ceph osd set norebalance
ceph osd set norecover
ceph osd set nobackfill

在业务空闲时,打开数据重建及迁移
ceph osd unset norebalance
ceph osd unset norecover
ceph osd unset nobackfill

2、优先保证client带宽

降低recovery的I/O带宽及backfill带宽
ceph tell osd.* injectargs '--osd-max-backfills 1'
ceph tell osd.* injectargs "--osd_recovery_max_active 1"
ceph tell osd.* injectargs '--osd-recovery-sleep 1'

待recovery完成,需要还原配置
ceph tell osd.* injectargs '--osd-max-backfills 1'
ceph tell osd.* injectargs "--osd_recovery_max_active 3"
ceph tell osd.* injectargs "--osd_recovery_sleep 0"

默认参数:

ceph --admin-daemon  /var/run/ceph/ceph-osd.0.asok config show | grep -E "osd_max_backfills|osd_recovery_max_active|osd_recovery_max_single_start|osd_recovery_sleep|osd_recovery_op_priority"

"osd_max_backfills": "1",
"osd_recovery_max_active": "3",
"osd_recovery_max_single_start": "1",
"osd_recovery_op_priority": "3",
"osd_recovery_sleep": "0.000000",

6、配置和master互信

ssh-copy-id root@ceph04

7、ceph管理节点推送配置文件

ceph-deploy --overwrite-conf admin ceph04

8、清理数据盘

for i in {c..d};do ceph-deploy disk zap ceph04 /dev/sd$i; done

9、创建OSD

for i in {c..d};do ceph-deploy osd create ceph04 --data /dev/sd$i --block-db bluestore_ssd/blockdb_sd$i --block-wal bluestore_ssd/blockwal_sd$i; done

10、移动到rack02

ceph osd crush move ceph04 rack=rack02
二、更换故障盘

1、查看down的osd,osd延迟(延迟较大可能是磁盘坏道),通过dmesg查看是否有IO报错

ceph osd tree|grep down
ceph osd perf
dmesg|grep I/O

2、完全关闭数据重建及迁移

ceph osd set norebalance
ceph osd set norecover
ceph osd set nobackfill

4、更换故障盘
5、剔除故障盘

ceph osd out osd.7
ceph osd crush rm osd.7 
ceph osd rm osd.7
ceph auth rm osd.7

卸载分区
umount -f /var/lib/ceph/osd/ceph-7
rm -rf /var/lib/ceph/osd/ceph-7

配置bluestore的需要清理lvm分区和硬盘内的分区
# 确定对应osd的db和wal所在的wal device和db device
ceph-volume lvm list
lvremove /dev/bluestore_ssd/blockwal_sdd  -y
lvremove /dev/bluestore_ssd/blockdb_sdd  -y
lvremove /dev/ceph-07924eaf-0caf-4f1b-b4de-743ce5772211/osd-block-90ef2320-46ea-4826-9c9f-434e785a5db2 -y

6、添加新盘

使用ssd磁盘为新更换的硬盘创建block-db、block-wal的lvm卷
lvcreate -n blockdb_sdd -L 2G bluestore_ssd; lvcreate -n blockwal_sdd -L 2G bluestore_ssd

清空sdd盘
ceph-deploy disk zap ceph04 /dev/sdd

创建osd
ceph-deploy osd create ceph04 --data /dev/sdd --block-db bluestore_ssd/blockdb_sdd --block-wal bluestore_ssd/blockwal_sdd

7、完全开启数据重建及迁移

ceph osd unset norebalance
ceph osd unset norecover
ceph osd unset nobackfill

8、ceph一致性检查(比较耗费资源和时间,生产环境不建议操作)

for i in `ceph pg dump|grep active+clean|awk '{print $1}'`; do ceph pg deep-scrub ${i};done
三、守护服务管理

1、服务管理

1>节点整体服务管理
systemctl status ceph.target

2>服务类型管理
systemctl status ceph-osd.target
systemctl status ceph-mds.target
systemctl status ceph-radosgw.target
systemctl status ceph-mon.target
systemctl status ceph-mgr.target
systemctl status ceph-fuse.target
systemctl status ceph-crash.service

3>细粒度服务管理
systemctl status ceph-osd@0.service
systemctl status ceph-mon@ceph01.service 
其他服务类似

2、ceph服务日志分析参考ceph日志和debug
通常Ceph配置文件开启debugging是启动集群时执行的。如果在启动集群时遇到问题,还可以将Ceph调试日志记录添加到Ceph配置文件中。可以在/var/log/ceph(默认位置)下查看Ceph日志文件

3、ceph集群状态监控

ceph -s
ceph -w
ceph health detail
ceph quorum_status
ceph osd stat
ceph osd df

4、pool资源池管理
5、集群参数调整:
1>调整ceph.conf 推送到节点
2>通过socket config set方式临时设置

四、crush map

1、查看crush map规则

ceph osd crush rule ls

2、查看rbd pool使用的crush rule

ceph osd pool get rbd crush_rule

3、查看指定rule 详细信息

ceph osd crush rule dump replicated_rule

4、编辑crush map

导出crush map二进制文件
ceph osd getcrushmap -o crushmap.bin
解码二进制文件
crushtool -d crushmap.bin -o crushmap.txt
按照需求改好crush map后重新编译成二进制文件
crushtool -c crushmap.txt -o crushmap-new.bin
应用crushmap
ceph osd setcrushmap -i crushmap-new.bin

5、修改crushmap注意事项
1>做操作前备份一份crushmap.bin便于后期对比和误操作后的恢复
2>调整大的架构调整会导致大量数据迁移,最好前期规划好
3>做了架构层级调整,修改crushmap后重启会被覆盖掉,可以通过添加osd crush update on start =false参数禁用,推送到所有节点重启ceph-osd.target

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值