megacli 管理 ceph 存储常用脚本

4 篇文章 0 订阅

说明

只适用于可用使用 megacli 支持的 raid controller 
只适用于手动进行 ceph 管理的集群,  不支持 ceph deploy 创建的集群
需修改对应磁盘设备命名

脚本

#!/bin/bash
#
#  使用说明: 
#     脚本用于停止 osd, umount osd, 标注对应 raid 设备为离线状态
#     ./stopraid.sh [number]     [number] 为对应的 raid 卡 slot 槽号
#
#  1. 获取帮助  (./raidrepair.sh   ||   ./raidrepair.sh  -h ) 
#
#  2. 确认整体磁盘
#    确认磁盘整体故障     (./raidrepair.sh -a)
#    确认具体某个磁盘故障  (./raidrepair.sh -c 8)   假设 raid 中第 8 个 slot number 有问题  ( 确定 slot number 8 对应着 /dev/sdf1  /var/lib/ceph/osd/ceph-20 )
#    确认磁盘挂载点       (./raidrepair.sh -m)    确定 slot number 8 对应着 /dev/sdf1  /var/lib/ceph/osd/ceph-20
#    确认 raid 中虚拟磁盘  (./raidrepair.sh -v)   缺 slot number 8 对应着 ( Virtual Drive: 5 )  
#    ##########   建议手动对 /etc/fstab 中, 所有 ceph 磁盘信息进行屏蔽  ####################
#
#  3. 故障处理
#     删除故障磁盘  (./raidrepair.sh -d 8) 
#
#  4. 关机, 更换磁盘  ( /etc/init.d/ceph -a stop osd ; init 0 )
#      ##########   关机后, 可能需要对 raid 执行清除缓存操作 (需要利用 idrac 进行手动操作 #####################
#      ##########   假如没有对 /etc/fstab 中 ceph 磁盘执行屏蔽操作, 可能导致无法登入系统 #####################
#
#  5. 修复磁盘
#      确认磁盘状态  ( ./raidrepair.sh -c 8 )  确认状态  ( Firmware state: Unconfigured(good), Spun Up )
#      令磁盘在线    ( ./raidrepair.sh -o 8 )
#      令该磁盘执行初始化  ( ./raidrepair.sh -i 8 )  注意, 不要写错 slot, 因为磁盘数据将会丢失
#      重启测试          ( init 6 )
#
#  6. 初始化 ceph
#      挂载所有的 ceph 磁盘 ( ./raidrepair.sh -m )
#      手动启动 ceph mon, ceph osd  ( /etc/init.d/ceph start mon )
#      格式化新的故障 ceph 磁盘 ( ./raidrepair.sh -f 8 )
#      初始化 ceph  ( ./raidrepair.sh -p 8 )



if [ `whoami` != 'root' ]
then
	echo "This program must be run by root."
	exit
fi


function rpmcheck()
{

	if [ ! -f "/sbin/megacli" ]
	then

	toolsrpm=`rpm -qa | grep -i megacli | tail -1 `
	if [ -z "$toolsrpm" ]
	then
		echo "Error:  Megacli tool not install."
		exit
	fi

		grep 8.07 `echo $toolsrpm`  > /dev/null 2>&1
		if [ $? -eq 0 ]
		then
			file=`rpm -ql $toolsrpm | grep -i "MegaCli$"`
		else
			file=`rpm -ql $toolsrpm | grep -i "MegaCli$" | grep "bin/"`
		fi
		ln -s $file /sbin/megacli
	fi
}

function parametercheck()
{
	if [ -z $1 ]
	then
		help
		exit
	fi
}


function verifyceph()
{
	dirs=`ls -d /var/lib/ceph/osd/*`
	if [ -z "$dirs" ]
	then
		echo "there is nothing is /var/lib/ceph/osd directory"
		exit
	else
		for dir in $dirs
		do
			if [ -f "$dir/whoami" ]
			then
				num=`cat $dir/whoami`
				partition=`df | grep "$dir" | awk '{print $1}'`
			else
				num="NULL"
				partition="NULL"
			fi
			echo -e "$dir\t\tCeph Number is: $num\t\tPartition is: $partition"
		done
	fi
}


function help()
{
 cat <<'EOF'
-a  use to check all raid slot status
-h  use to display help
-y  use to check ceph mount 
-m  use to show partition mount to ceph.
-c  [num]  || ex: ( raid card slot number )  use to check raid slot number.
-d  [num]  || ex: ( raid card slot number )  use to offline raid disk. 
-f  [num]  || ex: ( raid card slot number )  use to format partition.
-i  [num]  || ex: ( raid card slot number )  use to initial raid slot number disk.
-o  [num]  || ex: ( raid card slot number )  use to online raid disk.
-p  [num]  || ex: ( raid card slot number )  use to initial ceph and start up ceph.
-v  [num]  || ex: ( raid card slot number )  use to show virtual partition.
EOF
}

parametercheck $1
rpmcheck

function raidcheckall()
{
	/sbin/megacli -PDList  -aALL |egrep "Slot Number|Error"	
	exit
}

function virtualcheck()
{
	/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot"
}


function automountceph()
{
	disks=`fdisk -l  | grep GPT | awk '{print $1}' | grep -v sda`
	for disk in $disks
	do
		mount $disk /mnt
		if [ -f "/mnt/whoami" ]
		then
			cephnum=`cat /mnt/whoami`
			echo "mount  $disk /var/lib/ceph/osd/ceph-$cephnum"
		fi
		umount /mnt
	done
}

while getopts ":hHaAvVyYmMc:d:o:p:f:i:" OPT;
do
	case $OPT in
		h|H)
			help
		;;
		a|A)
			raidcheckall
		;;
		v|V)
			virtualcheck
		;;
		y|Y)
			verifyceph
		;;
		m|M)
			automountceph
		;;
		c)
			slotnum=$OPTARG		
		;;
		d)
			down=$OPTARG
		;;
		o)
			up=$OPTARG
		;;
		i)
			initial=$OPTARG
		;;
		f)
			prepare=$OPTARG
		;;
		p)
			ready=$OPTARG
		;;
		?)
			help
			exit
		;;
		*)
			help
			exit
		;;
	esac

done

function raidcheck()
{
		if [ ! -z $slotnum ]
		then
			/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $slotnum\$"/,/Media\ Type/p | grep -E "Slot Number|Device Id|Error Count|Failure Count|Raw Size|Firmware state|Inquiry Data" 

	
			virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $slotnum" | grep Virtual  | awk '{print $3}'`
		if [ -z $virtualnum ]
		then
			exit
		fi
			case $virtualnum in 
				0)
					partition=sda1
					disk=sda
				;;
				1)
					partition=sdb1
					disk=sdb
				;;
				2)
					partition=sdc1
					disk=sdc
				;;
				3)
					partition=sdd1
					disk=sdd
				;;
				4)
					partition=sde1
					disk=sde
				;;
				5)
					partition=sdf1
					disk=sdf
				;;
				6)
					partition=sdg1
					disk=sdg
				;;
				7)
					partition=sdh1
					disk=sdh
				;;
				8)
					partition=sdi1
					disk=sdi
				;;
				9)
					partition=sdj1
					disk=sdj
				;;
				10)
					partition=sdk1
					disk=sdk
				;;
				11)
					partition=sdl1
					disk=sdl
				;;
				12)
					partition=sdm1
					disk=sdm
				;;
				13)
					partition=sdn1
					disk=sdn
				;;
			esac
#		if [ -z $partition ]

			cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'`
			echo "Ceph number: $cephnum"
			echo "Partition: $partition"

		fi
}


function raidoffline()
{
	if [ ! -z $down ]
	then
		nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`
		echo $nums | grep $down > /dev/null
		if [ $? -ne 0 ]
		then
			echo "Slot Number: $down is not exists"
			exit
		else

			/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $down\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null
			if [ $? -ne 0 ]
			then
				echo "Slot Number: $down status error, please try to use ./$0 -c $down"
			fi

			virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $down" | grep Virtual | awk '{print $3}'`
			case $virtualnum in 
				0)
					partition=sda1
					disk=sda
				;;
				1)
					partition=sdb1
					disk=sdb
				;;
				2)
					partition=sdc1
					disk=sdc
				;;
				3)
					partition=sdd1
					disk=sdd
				;;
				4)
					partition=sde1
					disk=sde
				;;
				5)
					partition=sdf1
					disk=sdf
				;;
				6)
					partition=sdg1
					disk=sdg
				;;
				7)
					partition=sdh1
					disk=sdh
				;;
				8)
					partition=sdi1
					disk=sdi
				;;
				9)
					partition=sdj1
					disk=sdj
				;;
				10)
					partition=sdk1
					disk=sdk
				;;
				11)
					partition=sdl1
					disk=sdl
				;;
				12)
					partition=sdm1
					disk=sdm
				;;
				13)
					partition=sdn1
					disk=sdn
				;;
			esac
			raidnum=`/sbin/megacli -PDList  -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'`
			cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'`

# 执行 ceph 维护
			ceph osd set norecover
			ceph osd set noscrub
			ceph osd set nobackfill
			ceph osd set nodeep-scrub

			if [ ! -z $cephnum ]
			then
				/etc/init.d/ceph stop osd.$cephnum
				umount /dev/$partition
				sed -i /ceph-$cephnum/d /etc/fstab
			fi

#  执行 megacli 维护
			/sbin/megacli -PDOffline -PhysDrv [$raidnum:$down] -a0
			/sbin/megacli -PDMarkMissing -PhysDrv [$raidnum:$down] -a0
			/sbin/megacli -PDPrpRmv -PhysDrv [$raidnum:$down] -a0


## 提示
			echo "stop raid slot complete, please use this command to shutdown compute"
			echo "#####################################################" 
			echo "#       /etc/init.d/ceph -a stop osd ; init 0       #"
			echo "#####################################################" 
		fi
	fi
}


##################terrry


function raidonline()
{
######
#  检测没有被挂载的 ceph 目录对应的 number
#


	if [ ! -z $up ]
	then
		/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $up\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null
		if [ $? -eq 0 ]
		then
			echo "Slot Number: $up  status is up, use $0 -c $up"
			exit
		fi

		raidnum=`/sbin/megacli -PDList  -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'`


#
#    获得 -L 的 num 
		virtualnum=`megacli -cfgdsply -aALL  | grep  -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $up\$" | grep Virtual | awk '{print $3}'`


## megacli 维护
		/sbin/megacli -PDMakeGood -PhysDrv [$raidnum:$up] -force -a0 
		/sbin/megacli -CfgLdAdd -r0 [$raidnum:$up] WT RA DIRECT -a0
		if [ ! -z $virtualnum ]
		then
			/sbin/megacli  -LDInit -start -L$virtualnum -a0
		fi
	fi
}

function initialraid()
{
	if [ ! -z $initial ]
	then
		/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $initial\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null
		if [ $? -ne 0 ]
		then
			echo "Slot Number: $initial  status is not up, use $0 -c $initial"
			exit
		fi
		virtualnum=`/sbin/megacli -cfgdsply -aALL  | grep  -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $initial\$" | grep Virtual | awk '{print $3}'`
		if [ ! -z $virtualnum ]
		then
			/sbin/megacli  -LDInit -start -L$virtualnum -a0
		fi
	fi
}
#   分区
#
function startupceph()
{
	if [ ! -z $prepare ]
	then

		nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`
		echo $nums | grep $prepare > /dev/null
		if [ $? -ne 0 ]
		then
			echo "Slot Number: $prepare is not exists"
			exit
		else
			virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w  -B 1 "Slot Number: $prepare" | grep Virtual | awk '{print $3}'`
			case $virtualnum in 
				0)
					partition=sda1
					disk=sda
				;;
				1)
					partition=sdb1
					disk=sdb
				;;
				2)
					partition=sdc1
					disk=sdc
				;;
				3)
					partition=sdd1
					disk=sdd
				;;
				4)
					partition=sde1
					disk=sde
				;;
				5)
					partition=sdf1
					disk=sdf
				;;
				6)
					partition=sdg1
					disk=sdg
				;;
				7)
					partition=sdh1
					disk=sdh
				;;
				8)
					partition=sdi1
					disk=sdi
				;;
				9)
					partition=sdj1
					disk=sdj
				;;
				10)
					partition=sdk1
					disk=sdk
				;;
				11)
					partition=sdl1
					disk=sdl
				;;
				12)
					partition=sdm1
					disk=sdm
				;;
				13)
					partition=sdn1
					disk=sdn
				;;
			esac
#############
		fi

		if [[ ! -b "/dev/$partition" ]] && [[ -b "/dev/$disk" ]]
		then
			disksize=`parted /dev/$disk print|  grep -v Flags | grep ^Disk  | awk -F[:\ ] '{print $4}' | sed s/GB//`
			if [ $disksize -gt 2000 ] 
			then
				parted /dev/"$disk"  mklabel gpt  
			else
				parted /dev/"$disk" mklabel msdos
			fi
			parted /dev/"$disk"  mkpart primary xfs 1 100%
			partprobe
			mkfs -t xfs -i size=512 /dev/"$partition"
		else
			echo "/dev/$partition is exists, are you sure wanna initial? use dd if=/dev/zero of=/dev/$disk bs=1M count=10"
		fi
	fi
}


function initialceph()
{
	if [ ! -z $ready ]
	then
		nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`
		echo $nums | grep $ready > /dev/null
		if [ $? -ne 0 ]
		then
			echo "Slot Number: $ready is not exists"
			exit
		else
			virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w  -B 1 "Slot Number: $ready" | grep Virtual | awk '{print $3}'`
			case $virtualnum in 
				0)
					partition=sda1
					disk=sda
				;;
				1)
					partition=sdb1
					disk=sdb
				;;
				2)
					partition=sdc1
					disk=sdc
				;;
				3)
					partition=sdd1
					disk=sdd
				;;
				4)
					partition=sde1
					disk=sde
				;;
				5)
					partition=sdf1
					disk=sdf
				;;
				6)
					partition=sdg1
					disk=sdg
				;;
				7)
					partition=sdh1
					disk=sdh
				;;
				8)
					partition=sdi1
					disk=sdi
				;;
				9)
					partition=sdj1
					disk=sdj
				;;
				10)
					partition=sdk1
					disk=sdk
				;;
				11)
					partition=sdl1
					disk=sdl
				;;
				12)
					partition=sdm1
					disk=sdm
				;;
				13)
					partition=sdn1
					disk=sdn
				;;
			esac
#############
		fi
#  检测没有被挂载的 ceph 目录对应的 number
#
		for dir in `ls -d /var/lib/ceph/osd/ceph-*`
		do
			if [ ! -f  "$dir/whoami" ]
			then
				cephnum=`echo $dir | awk -F"-"  '{print $NF}'`
			fi
		done


		mount "/dev/$partition" "/var/lib/ceph/osd/ceph-$cephnum"
		
		uuid=`blkid /dev/$partition | awk -F\" '{print $2}'`
		echo  "UUID=$uuid  /var/lib/ceph/osd/ceph-$cephnum  xfs defaults 0 0" >> /etc/fstab
		cephuuid=`cat /etc/ceph/ceph.conf | grep fsid | awk -F[=\ ] '{print $NF}'`

		/usr/bin/ceph-osd -i $cephnum --mkfs --mkkey --osd-uuid $cephuuid

		authkey=`/usr/bin/ceph auth list 2> /dev/null |  grep -w -A 1 osd."$cephnum"  | grep key | awk -F[:\ ] '{print $NF}'`
		echo -e "[osd.$cephnum]\n\tkey = $authkey"  > /var/lib/ceph/osd/ceph-$cephnum/keyring
		touch /var/lib/ceph/osd/ceph-$cephnum/{sysvinit,done}


		/etc/init.d/ceph start osd.$cephnum
		ceph osd unset norecover
		ceph osd unset noscrub
		ceph osd unset nobackfill
		ceph osd unset nodeep-scrub
	fi
}

raidcheck
raidoffline
raidonline
startupceph
initialceph
initialraid
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Terry_Tsang

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值