全网最新~使用Shell脚本搭建双机高可用的Lustre集群

1.环境说明:

主机名系统挂载情况网卡ens33,Lnet的IP地址Lustre集群名内存
mds005Centos7.9(共享磁盘)1个mgs,1个MDT,2个OST192.168.10.25/209.21global2G
mds006Centos7.9(共享磁盘)1个mgs,1个MDT,2个OST192.168.10.26/209.22global2G
managerCentos7.9192.168.10.52G

注意:自动化脚本lustre_auto.sh在manager节点上,五块4G共享磁盘(/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde,/dev/sdf),两个网卡(管理IP地址,Lnet通信IP地址)

2.变量定义:

# ens33上的IP地址,即集群管理IP地址
host_address=(192.168.10.25 192.168.10.26)      
# ens38上的IP地址(其他也可以,两个节点网卡名一样就行),即Lnet通信IP地址
lnet_address=(192.168.209.25 192.168.209.26)    
# 集群节点的主机名和域名
host_hostname=(mds005 mds006) 
# ssh连接密码,集群认证密码
host_passwd=110119

3.安装expect命令:

# 安装expect命令
expect -v &> /dev/null
if [ `echo $?` -ne 0 ];then
    echo "没有expect,安装expect命令"
    yum install -y expect
fi

4.配置免密登录:

# 配置免密登录
echo "########################## 本地开始配置ssh ##########################"
if [ `test -a ~/.ssh/id_rsa.pub;echo $?` == 0 ];then
	echo "ssh公钥已创建"
else
	echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10

spawn ssh-keygen -t rsa -b 1024

## 开始进连续捕获
expect	{
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
        ".ssh/id_rsa)"         { send "\n";  exp_continue }
        "Overwrite (y/n)?"     { send "y\n"; exp_continue }
        "no passphrase):"      { send "\n";  exp_continue }
        "passphrase again:"    { send "\n";  exp_continue }
}
eof
fi



# 本地的密钥开始加入被控制主机
for ((j=0;j<2;j++));do
	echo "########################## ${host_address[j]}正在被添加公钥 ##########################"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10

spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[j]}

## 开始进连续捕获
expect	{
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
}
eof
	echo "############# ${host_address[j]}配置完毕 #############"
done




# 被控制主机开始创建密钥
for ((j=0;j<2;j++));do
	echo "########################## ${host_address[j]}开始创建密钥 ##########################"	
	if [ `ssh root@${host_address[j]} 'test -a ~/.ssh/id_rsa.pub;echo $?'` == 0 ];then
		echo "ssh公钥已创建"
	else
		echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10

spawn ssh root@${host_address[j]} "ssh-keygen -t rsa -b 1024"

## 开始进连续捕获
expect	{
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
        ".ssh/id_rsa)"         { send "\n";  exp_continue }
        "Overwrite (y/n)?"     { send "y\n"; exp_continue }
        "no passphrase):"      { send "\n";  exp_continue }
        "passphrase again:"    { send "\n";  exp_continue }
}
eof
	fi
	echo "############# ${host_address[j]}配置完毕 #############"
done



# 被控制主机开始分配密钥
for ((j=0;j<2;j++));do
	for((k=0;k<2;k++));do
		echo "########################## ${host_address[j]}开始分配公钥给${host_address[k]} ##########################"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10

spawn ssh -t root@${host_address[j]} "ssh root@${host_address[k]}"
spawn ssh -t root@${host_address[j]} "ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[k]}"

## 开始进连续捕获
expect	{
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
}
eof
		echo "############# ${host_address[j]}配置完毕 #############"
	done
done

5.主机名域名映射:

for ((i=0;i<100;i++));do
	read -p "修改主机名和配置域名映射?(Y/n): " flag
	if [ "${flag}" == "Y" ];then
		sleep 3
		echo "########################## 开始配置主机名和域名映射 ##########################"
		for ((j=0;j<2;j++));do
			if [ `ssh root@${host_address[j]} "hostname"` != "${host_hostname[j]}" ];then
				ssh root@${host_address[j]} "hostnamectl set-hostname ${host_hostname[j]}"
			fi
			ssh root@${host_address[j]} "cat << eof > /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
eof"
			for ((k=0;k<2;k++));do
				ssh root@${host_address[j]} "echo '${host_address[k]} ${host_hostname[k]}' >> /etc/hosts"
			done
		done
		echo "############# 配置完毕 #############"
		break
	elif [ "${flag}" == "n" ];then
		echo "############# 已跳过步骤-修改主机名和配置域名映射 #############"
		break
	elif [ ${i} -eq 99 ];then
		echo "############# 已退出 #############"
		exit
	else continue;fi
done

# 测试被控主机ssh连接
for ((j=0;j<2;j++));do
	echo "########################## ${host_address[j]}开始测试 ##########################"	
	for ((k=0;k<2;k++));do
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10

spawn ssh -t root@${host_address[j]} "ssh root@${host_address[k]} 'exit'"

## 开始进连续捕获
expect	{
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
}
eof
		if [ `echo $?` != 0 ];then
			echo "${host_hostname[j]}主机无法免密登录${host_hostname[k]}"
			exit
		fi
	done
	echo "############# ${host_address[j]}测试完毕 #############"
done

6.配置防火墙和selinux:

# 配置防火墙和selinux
for ((i=0;i<100;i++));do
    read -p "配置防火墙和selinux?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置防火墙和selinux ##########################"
        for ((j=0;j<2;j++));do
            ssh root@${host_address[j]} "systemctl stop firewalld;systemctl disable firewalld"
            ssh root@${host_address[j]} "sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config"
        done
        echo "############# 配置完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置防火墙和selinux #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

7.配置yum源:

# 配置yum源
for ((i=0;i<100;i++));do
    read -p "配置yum源?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置yum源 ##########################"
        for ((j=0;j<2;j++));do
            echo "########################## 配置${host_address[j]}的本地yum源 ##########################"
            ssh root@${host_address[j]} "mkdir /mnt/cdrom &> /dev/null;mount /dev/cdrom /mnt/cdrom"
            if [ -z "`ssh root@${host_address[j]} "grep '^\/dev\/cdrom' /etc/fstab"`" ];then
                ssh root@${host_address[j]} "cat << eof >> /etc/fstab
/dev/cdrom /mnt/cdrom iso9660 defaults  0  0
eof"
            fi
            ssh root@${host_address[j]} "cat << eof > /etc/yum.repos.d/centos-local.repo
[centos7.9]
name=centos7.9
baseurl=file:///mnt/cdrom
enabled=1
gpgcheck=0
eof"
            echo "############# ${host_address[j]}配置完毕 #############"
    
            echo "########################## 配置${host_address[j]}的扩展源 ##########################"
            ssh root@${host_address[j]} "yum install epel-release -y"
            echo "############# ${host_address[j]}配置完毕 #############"
    
            echo "########################## 配置${host_address[j]}的阿里yum源 ##########################"
            ssh root@${host_address[j]} "yum install -y wget"
            if [ `ssh root@${host_address[j]} 'test -a /etc/yum.repos.d/CentOS-Base.repo;echo $?'` == 0 ];then
                ssh root@${host_address[j]} "wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo"
            fi
            ssh root@${host_address[j]} "yum clean all && yum repolist"
            echo "############# ${host_address[j]}配置完毕 #############"
    
            if [ `ssh root@${host_address[j]} "echo $?"` != 0 ];then
                echo "yum源配置有误,退出执行脚本"
                exit
            fi
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置yum源 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

8.配置chrony时间服务器:

# 配置chrony时间服务器
for ((i=0;i<100;i++));do
    read -p "配置chrony时间服务器?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置chrony ##########################"
        for ((j=0;j<2;j++));do
            if [ `ssh root@${host_address[j]} "systemctl restart chronyd;echo $?"` != 0 ];then
                echo "${host_address[j]} 安装chrony"
                ssh root@${host_address[j]} "yum install -y chrony && systemctl restart chronyd"
                if [ `echo $?` != 0 ];then
                    echo "安装失败,请排错!"
                    exit
                fi
            fi
            echo "${host_address[j]}配置chrony"
                ssh root@${host_address[j]} "sed -i '/^server/d' /etc/chrony.conf"
            if [ ${host_address[j]} == ${host_address[0]} ];then
                ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
                ssh root@${host_address[j]} "sed -i 's/#allow 192.168.0.0\/16/allow 192.168.10.0\/16/' /etc/chrony.conf"
                ssh root@${host_address[j]} "sed -i 's/#local stratum 10/local stratum 10/' /etc/chrony.conf"
                sleep 2
​
            else
                ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
            fi
            ssh root@${host_address[j]} "systemctl restart chronyd && systemctl enable chronyd &> /dev/null"
            sleep 5
            ssh root@${host_address[j]} "timedatectl set-ntp true && chronyc sources -v | sed -n '/^\^\*/p'"
            if [ -z "`ssh root@${host_address[j]} "chronyc sources -v | sed -n '/^\^\*/p'"`" ];then
                echo -e "\e[31m此节点${host_address[j]}的chrony配置有误,请手动调试\e[0m"
                exit
            fi
            echo "############# ${host_address[j]}配置完毕 #############"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置chrony时间服务器 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

9.安装e2fsprogs:

# 安装e2fsprogs
for ((i=0;i<100;i++));do
    read -p "安装e2fsprogs?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始安装e2fsprogs ##########################"
        for ((j=0;j<2;j++));do
        
            echo "########################## ${host_address[j]}开始安装e2fsprogs ##########################"
            ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5 && mkdir ~/e2fsprogs1.44.5"
            ssh root@${host_address[j]} "wget -c -r -nd https://downloads.whamcloud.com/public/e2fsprogs/1.44.5.wc1/el7/RPMS/x86_64/ -P ~/e2fsprogs1.44.5"
            ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5/index.html* ~/e2fsprogs1.44.5/unknown.gif ~/e2fsprogs1.44.5/*.gif ~/e2fsprogs1.44.5/sha256sum"
            ssh root@${host_address[j]} "rpm -Uvh ~/e2fsprogs1.44.5/* --force"
            ssh root@${host_address[j]} "rpm -qa | grep e2fsprogs"
            if [ `echo $?` != 0 ];then
                echo "安装失败,请排错!"
                exit
            fi  
            echo "############# ${host_address[j]}配置完毕 #############"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-安装e2fsprogs #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

10.安装lustre软件:

for ((i=0;i<100;i++));do
    read -p "安装Lustre软件?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始安装lustre ##########################"
        for ((j=0;j<2;j++));do
            echo "########################## ${host_address[j]}开始安装lustre ##########################"
            ssh root@${host_address[j]} "yum install -y linux-firmware dracut selinux-policy-targeted kexec-tools libyaml perl"
            ssh root@${host_address[j]} "rm -rf ~/lustre2.12.1 && mkdir ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kernel-3.10.0-957.10.1.el7_lustre.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-osd-ldiskfs-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-osd-ldiskfs-mount-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "rpm -Uvh ~/lustre2.12.1/*.rpm  --force"
            ssh root@${host_address[j]} "rpm -qa | grep lustre"
            if [ `echo $?` != 0 ];then
                echo "安装失败,请排错!"
                exit
            fi
            echo "############# ${host_address[j]}配置完毕 #############"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-安装lustre #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

11.控制重启和检测重启:

for ((i=0;i<100;i++));do
    read -p "是否重启集群主机(只有重启kernel内核才能更换生效)?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
            ssh root@${host_address[j]} "reboot" 
            continue
        done
    
        for ((k=0;k<100;k++));do
            if [ ${k} -eq 99 ];then
                echo "############# 设备连接超时.... #############"
                exit
            fi
            if [ `ssh root@${host_address[0]} -o ConnectTimeout=5 "exit";echo $?` == 0 -a `ssh root@${host_address[1]} -o ConnectTimeout=5 "exit";echo $?` == 0 ];then
                echo "############# 设备已重启 #############"
                break
            else
                echo "############# 设备正在重启 #############"
            fi
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-重启 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

12.加载模块:

# 检查lustre
for((i=0;i<2;i++));do
    echo "########################## ${host_address[i]}加载Lustre模块,查看Lustre版本 ##########################"
    ssh root@${host_address[i]} "modprobe lustre && lsmod | grep lustre"
    ssh root@${host_address[i]} "modinfo lustre"
    echo "############# ${host_address[i]}配置完毕 #############"
done

13.配置和Lustre网络:

# 配置Lnet网卡的IP地址和Lustre网络
for ((i=0;i<100;i++));do
    read -p "是否继续配置Lnet网卡的IP地址?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
            echo "########################## ${host_address[j]}配置Lnet网卡的IP地址 ##########################"
            read -p "请根据以上输出显示中,输入你要配置的LnNet网卡名称:" network_card
            if [ -z "`ssh root@${host_address[j]} "ip addr | grep -o ${network_card}"`" ];then
                echo "网卡不存在,请重试"
                exit
            fi
            ssh root@${host_address[j]} "nmcli connection delete ${network_card} &> /dev/null"
            ssh root@${host_address[j]} "nmcli connection add type ethernet con-name ${network_card} ifname ${network_card} ipv4.method manual ipv4.addresses '${lnet_address[j]}/24' autoconnect yes"
            ssh root@${host_address[j]} "nmcli connection up ${network_card}"
            echo "############# ${host_address[j]}配置完毕 #############"
            echo "########################## ${host_address[j]}配置Lnet网络 ##########################"
            ssh root@${host_address[j]} "echo options lnet networks='tcp(ens33),tcp2(${network_card})' > /etc/modprobe.d/lustre.conf"
            ssh root@${host_address[j]} "lustre_rmmod && modprobe -v lustre"
            echo "############# ${host_address[j]}配置完毕 #############"
            echo "############# ${host_address[j]}查看Lnet网络 #############"
            ssh root@${host_address[j]} "lctl list_nids"
            echo "##########################################################"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置Lnet网卡的IP地址 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

14.格式化lustre:

# 格式化lustre
for ((i=0;i<100;i++));do
    read -p "是否继续格式化lustre?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始格式化lustre ##########################"
        ssh root@${host_address[0]} "lsblk"
        ssh root@${host_address[0]} "mkfs.lustre --mgs --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdb"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdc"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdd"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sde"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdf"
        echo "############# 格式化完毕 #############"
        echo "########################## 创建挂载点目录 ##########################"
        for((j=0;j<2;j++));do
            ssh root@${host_address[j]} "mkdir /mnt/mgs &> /dev/null;mkdir /mnt/mdt1 &> /dev/null;mkdir /mnt/mdt2 &> /dev/null;mkdir /mnt/ost1 &> /dev/null;mkdir /mnt/ost2 &> /dev/null"
        done
        echo "############# 创建完毕 #############"
        echo "############# ${host_address[1]}查看格式化 #############"
        ssh root@${host_address[1]} "blkid /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf"
        echo "##########################################################"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-格式化lustre #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

15.测试lustre挂载:

# 测试挂载
for ((i=0;i<100;i++));do
    read -p "是否继续测试lustre的挂载?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
        echo "########################## ${host_address[j]}测试挂载 ##########################"
            echo "########################## mgs测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/mgs &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdb /mnt/mgs"
            if [ `echo $?` != 0 ];then
                echo "############# mgs测试有误,请手动查看! #############"
                exit
            fi
            echo "########################## mdt1测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/mdt1 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdc /mnt/mdt1"
            if [ `echo $?` != 0 ];then
                echo "############# mdt1测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/mgs"
                exit
            fi
            echo "########################## mdt2测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/mdt2 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdd /mnt/mdt2"
            if [ `echo $?` != 0 ];then
                echo "############# mdt2测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/mdt1;umount /mnt/mgs"
                exit
            fi
            echo "########################## ost1测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/ost1 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sde /mnt/ost1"
            if [ `echo $?` != 0 ];then
                echo "############# ost1测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
                exit
            fi
            echo "########################## ost2测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/ost2 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdf /mnt/ost2"
            if [ `echo $?` != 0 ];then
                echo "############# ost2测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
                exit
            fi
            ssh root@${host_address[j]} "umount /mnt/ost2;umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
        done
        echo "############# 测试完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-测试挂载 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

16.安装高可用和创建集群:

​
# 安装packemaker和corosync软件和创建集群
for ((i=0;i<100;i++));do
    read -p "是否继续安装packemaker和corosync软件和创建集群?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        if [ -z "`ssh root@${host_address[0]} 'pcs status' | grep mycluster`" ];then
            for((j=0;j<2;j++));do
                echo "########################## ${host_address[j]}开始安装 ##########################"
                ssh root@${host_address[j]} "yum install pacemaker pcs policycoreutils-python -y"
                echo "############# ${host_address[j]}安装完毕 #############"
                echo "########################## ${host_address[j]}开始配置 ##########################"
                ssh root@${host_address[j]} "systemctl enable pcsd;systemctl restart pcsd"
                ssh root@${host_address[j]} "echo '${host_passwd}' |passwd --stdin hacluster"
                echo "############# ${host_address[j]}配置完毕 #############"
            done
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
​
spawn ssh root@${host_address[0]} "pcs cluster auth ${host_hostname[*]}"
​
## 开始进连续捕获
expect  {
        "Username:"          { send "hacluster\n";  exp_continue }
        "Password:"          { send "${host_passwd}\n"; exp_continue }
}
eof
            echo "########################## 开始创建集群 ##########################"
            ssh root@${host_address[0]} "pcs cluster setup --name mylustre ${host_hostname[*]}"
            echo "############# 创建完毕 #############"
        fi
        echo "########################## 启动集群 ##########################"
        ssh root@${host_address[0]} "pcs cluster start --all"
        echo "############# 启动完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-安装packemaker和corosync软件和配置 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

​

17.配置资源防护:

# 配置资源防护
for ((i=0;i<100;i++));do
    read -p "是否继续配置资源防护?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置资源防护 ##########################"
        for((j=0;j<2;j++));do
            ssh root@${host_address[j]} "yum install -y fence-agents-all"
        done
        ssh root@${host_address[0]} "pcs property set stonith-enabled=true"
        if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping" | grep -c "Started"` -eq 2 ];then
            echo "############# stonith已创建,并且正常运行,跳过配置stonith #############"
            break
        fi
        if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping"` -eq 2 ];then
            ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
            ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
        fi
​
        ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[0]} fence_heuristics_ping ping_targets=${host_address[0]}"
        ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[1]} fence_heuristics_ping ping_targets=${host_address[1]}"
        ssh root@${host_address[0]} "pcs status"
        echo "############# 配置完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置资源防护 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

18.创建lustre资源:

# 创建lustre资源
for ((i=0;i<100;i++));do
    read -p "是否继续创建lustre资源(可删除后再创建)?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
            if [ -z `ssh root@${host_address[j]} 'rpm -qa | grep lustre-resource-agents-2.12.1-1'` ];then
                echo "########################## ${host_address[j]}安装ocf:lustre:Lustre包 ##########################"
                ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
                ssh root@${host_address[j]} "rpm -ivh lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
                echo "############# 安装完毕 #############"
            fi
        done
        echo "########################## 开始删除lustre资源 ##########################"
        ssh root@${host_address[0]} "pcs resource delete global-ost2 &> /dev/null;pcs resource delete global-ost1 &> /dev/null"
        ssh root@${host_address[0]} "pcs resource delete global-mdt1 &> /dev/null;pcs resource delete global-mdt2 &> /dev/null"
        ssh root@${host_address[0]} "pcs resource delete global-mgs &> /dev/null"
        echo "############# 删除完毕 #############"
        echo "########################## 开始创建lustre资源 ##########################"
        ssh root@${host_address[0]} "pcs resource create global-mgs ocf:lustre:Lustre target=/dev/sdb mountpoint=/mnt/mgs"
        if [ `echo $?` != 0 ];then
            echo "############# mgs资源创建有误,请手动查看! #############"
            exit
        fi
        ssh root@${host_address[0]} "pcs resource create global-mdt1 ocf:lustre:Lustre target=/dev/sdc mountpoint=/mnt/mdt1"
        if [ `echo $?` != 0 ];then
            echo "############# mdt1资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs resource create global-mdt2 ocf:lustre:Lustre target=/dev/sdd mountpoint=/mnt/mdt2"
        if [ `echo $?` != 0 ];then
            echo "############# mdt2资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs resource create global-ost1 ocf:lustre:Lustre target=/dev/sde mountpoint=/mnt/ost1"
        if [ `echo $?` != 0 ];then
            echo "############# ost1资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs resource create global-ost2 ocf:lustre:Lustre target=/dev/sdf mountpoint=/mnt/ost2"
        if [ `echo $?` != 0 ];then
            echo "############# ost2资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-mgs global-mgs ${host_hostname[0]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt1 global-mdt1 ${host_hostname[0]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt2 global-mdt2 ${host_hostname[1]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost1 global-ost1 ${host_hostname[0]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost2 global-ost2 ${host_hostname[1]} 10"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt1"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt2"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost1"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost2"
        if [ `echo $?` != 0 ];then
            echo "############# 资源约束创建有误,请手动查看! #############"
            exit
        fi
        lnet_name1=`ssh root@${host_address[0]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
        lnet_name2=`ssh root@${host_address[1]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
        ssh root@${host_address[0]} "pcs resource delete ping-lnet &>/dev/null;pcs resource delete global-healthLUSTRE &> /dev/null"
        ssh root@${host_address[0]} "pcs resource create ping-lnet ocf:lustre:healthLNET lctl=true multiplier=1001 device=${lnet_name1} host_list='${lnet_address[0]}@tcp2 ${lnet_address[1]}@tcp2' --clone"
        ssh root@${host_address[0]} "pcs resource create global-healthLUSTRE ocf:lustre:healthLUSTRE --clone"
        if [ `echo $?` != 0 ];then
            echo "############# 资源监听创建有误,请手动查看! #############"
            exit
        fi
        echo "############# 创建完毕 #############"
        echo "##########################    查看集群状态    ##########################"
        echo "########################################################################"
        echo "########################################################################"
        ssh root@${host_address[0]} "pcs status"
        echo "########################################################################"
        echo "########################################################################"
        echo "################################################## 脚本到此全部执行完毕 ##################################################"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-创建lustre资源 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

19.完整脚本:

# 高可用双机lustre集群的的自动化部署脚本,(部署条件:配置了共享磁盘(/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde,/dev/sdf)。并且有两个网卡,其中一个网卡已经配置了IP地址)
host_address=(192.168.10.25 192.168.10.26)
lnet_address=(192.168.209.25 192.168.209.26)
host_hostname=(mds005 mds006)
host_passwd=110119
​
​
# 安装expect命令
expect -v &> /dev/null
if [ `echo $?` -ne 0 ];then
    echo "没有expect,安装expect命令"
    yum install -y expect
fi
​
# 配置免密登录
echo "########################## 本地开始配置ssh ##########################"
if [ `test -a ~/.ssh/id_rsa.pub;echo $?` == 0 ];then
    echo "ssh公钥已创建"
else
    echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
​
spawn ssh-keygen -t rsa -b 1024
​
## 开始进连续捕获
expect  {
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
        ".ssh/id_rsa)"         { send "\n";  exp_continue }
        "Overwrite (y/n)?"     { send "y\n"; exp_continue }
        "no passphrase):"      { send "\n";  exp_continue }
        "passphrase again:"    { send "\n";  exp_continue }
}
eof
fi
​
​
​
# 本地的密钥开始加入被控制主机
for ((j=0;j<2;j++));do
    echo "########################## ${host_address[j]}正在被添加公钥 ##########################"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
​
spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[j]}
​
## 开始进连续捕获
expect  {
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
}
eof
    echo "############# ${host_address[j]}配置完毕 #############"
done
​
​
​
# 被控制主机开始创建密钥
for ((j=0;j<2;j++));do
    echo "########################## ${host_address[j]}开始创建密钥 ##########################"   
    if [ `ssh root@${host_address[j]} 'test -a ~/.ssh/id_rsa.pub;echo $?'` == 0 ];then
        echo "ssh公钥已创建"
    else
        echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
​
spawn ssh root@${host_address[j]} "ssh-keygen -t rsa -b 1024"
​
## 开始进连续捕获
expect  {
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
        ".ssh/id_rsa)"         { send "\n";  exp_continue }
        "Overwrite (y/n)?"     { send "y\n"; exp_continue }
        "no passphrase):"      { send "\n";  exp_continue }
        "passphrase again:"    { send "\n";  exp_continue }
}
eof
    fi
    echo "############# ${host_address[j]}配置完毕 #############"
done
​
​
​
# 被控制主机开始分配密钥
for ((j=0;j<2;j++));do
    echo "########################## ${host_address[j]}开始分配公钥 ##########################"   
    for ((k=0;k<2;k++));do
        
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
​
spawn ssh root@${host_address[j]} "ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[k]}"
​
## 开始进连续捕获
expect  {
        "connecting (yes/no)?" { send "yes\n";  exp_continue }
        "s password:"          { send "${host_passwd}\n"; exp_continue }
}
eof
    done
    echo "############# ${host_address[j]}分配完毕 #############"
done
​
​
​
​
​
# 修改主机名和配置域名映射
for ((i=0;i<100;i++));do
    read -p "修改主机名和配置域名映射?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置主机名和域名映射 ##########################"
        for ((j=0;j<2;j++));do
            if [ `ssh root@${host_address[j]} "hostname"` != "${host_hostname[j]}" ];then
                ssh root@${host_address[j]} "hostnamectl set-hostname ${host_hostname[j]}"
            fi
            ssh root@${host_address[j]} "cat << eof > /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
eof"
            for ((k=0;k<2;k++));do
                ssh root@${host_address[j]} "echo '${host_address[k]} ${host_hostname[k]}' >> /etc/hosts"
            done
        done
        echo "############# 配置完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-修改主机名和配置域名映射 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
​
​
​
# 配置防火墙和selinux
for ((i=0;i<100;i++));do
    read -p "配置防火墙和selinux?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置防火墙和selinux ##########################"
        for ((j=0;j<2;j++));do
            ssh root@${host_address[j]} "systemctl stop firewalld;systemctl disable firewalld"
            ssh root@${host_address[j]} "sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config"
        done
        echo "############# 配置完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置防火墙和selinux #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
# 配置yum源
for ((i=0;i<100;i++));do
    read -p "配置yum源?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置ssh ##########################"
        for ((j=0;j<2;j++));do
            echo "########################## 配置${host_address[j]}的本地yum源 ##########################"
            ssh root@${host_address[j]} "mkdir /mnt/cdrom &> /dev/null;mount /dev/cdrom /mnt/cdrom"
            if [ -z "`ssh root@${host_address[j]} "grep '^\/dev\/cdrom' /etc/fstab"`" ];then
                ssh root@${host_address[j]} "cat << eof >> /etc/fstab
/dev/cdrom /mnt/cdrom iso9660 defaults  0  0
eof"
            fi
            ssh root@${host_address[j]} "cat << eof > /etc/yum.repos.d/centos-local.repo
[centos7.9]
name=centos7.9
baseurl=file:///mnt/cdrom
enabled=1
gpgcheck=0
eof"
            echo "############# ${host_address[j]}配置完毕 #############"
    
            echo "########################## 配置${host_address[j]}的扩展源 ##########################"
            ssh root@${host_address[j]} "yum install epel-release -y"
            echo "############# ${host_address[j]}配置完毕 #############"
    
            echo "########################## 配置${host_address[j]}的阿里yum源 ##########################"
            ssh root@${host_address[j]} "yum install -y wget"
            if [ `ssh root@${host_address[j]} 'test -a /etc/yum.repos.d/CentOS-Base.repo;echo $?'` == 0 ];then
                ssh root@${host_address[j]} "wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo"
            fi
            ssh root@${host_address[j]} "yum clean all && yum repolist"
            echo "############# ${host_address[j]}配置完毕 #############"
    
            if [ `ssh root@${host_address[j]} "echo $?"` != 0 ];then
                echo "yum源配置有误,退出执行脚本"
                exit
            fi
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置yum源 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
# 配置chrony时间服务器
for ((i=0;i<100;i++));do
    read -p "配置chrony时间服务器?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置chrony ##########################"
        for ((j=0;j<2;j++));do
            if [ `ssh root@${host_address[j]} "systemctl restart chronyd;echo $?"` != 0 ];then
                echo "${host_address[j]} 安装chrony"
                ssh root@${host_address[j]} "yum install -y chrony && systemctl restart chronyd"
                if [ `echo $?` != 0 ];then
                    echo "安装失败,请排错!"
                    exit
                fi
            fi
            echo "${host_address[j]}配置chrony"
                ssh root@${host_address[j]} "sed -i '/^server/d' /etc/chrony.conf"
            if [ ${host_address[j]} == ${host_address[0]} ];then
                ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
                ssh root@${host_address[j]} "sed -i 's/#allow 192.168.0.0\/16/allow 192.168.10.0\/16/' /etc/chrony.conf"
                ssh root@${host_address[j]} "sed -i 's/#local stratum 10/local stratum 10/' /etc/chrony.conf"
                sleep 2
​
            else
                ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
            fi
            ssh root@${host_address[j]} "systemctl restart chronyd && systemctl enable chronyd &> /dev/null"
            sleep 5
            ssh root@${host_address[j]} "timedatectl set-ntp true && chronyc sources -v | sed -n '/^\^\*/p'"
            if [ -z "`ssh root@${host_address[j]} "chronyc sources -v | sed -n '/^\^\*/p'"`" ];then
                echo -e "\e[31m此节点${host_address[j]}的chrony配置有误,请手动调试\e[0m"
                exit
            fi
            echo "############# ${host_address[j]}配置完毕 #############"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置chrony时间服务器 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
​
​
​
# 安装e2fsprogs
for ((i=0;i<100;i++));do
    read -p "安装e2fsprogs?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始安装e2fsprogs ##########################"
        for ((j=0;j<2;j++));do
        
            echo "########################## ${host_address[j]}开始安装e2fsprogs ##########################"
            ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5 && mkdir ~/e2fsprogs1.44.5"
            ssh root@${host_address[j]} "wget -c -r -nd https://downloads.whamcloud.com/public/e2fsprogs/1.44.5.wc1/el7/RPMS/x86_64/ -P ~/e2fsprogs1.44.5"
            ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5/index.html* ~/e2fsprogs1.44.5/unknown.gif ~/e2fsprogs1.44.5/*.gif ~/e2fsprogs1.44.5/sha256sum"
            ssh root@${host_address[j]} "rpm -Uvh ~/e2fsprogs1.44.5/* --force"
            ssh root@${host_address[j]} "rpm -qa | grep e2fsprogs"
            if [ `echo $?` != 0 ];then
                echo "安装失败,请排错!"
                exit
            fi  
            echo "############# ${host_address[j]}配置完毕 #############"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-安装e2fsprogs #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
​
# 安装lustre
for ((i=0;i<100;i++));do
    read -p "安装Lustre软件?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始安装lustre ##########################"
        for ((j=0;j<2;j++));do
            echo "########################## ${host_address[j]}开始安装lustre ##########################"
            ssh root@${host_address[j]} "yum install -y linux-firmware dracut selinux-policy-targeted kexec-tools libyaml perl"
            ssh root@${host_address[j]} "rm -rf ~/lustre2.12.1 && mkdir ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kernel-3.10.0-957.10.1.el7_lustre.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-osd-ldiskfs-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-osd-ldiskfs-mount-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
            ssh root@${host_address[j]} "rpm -Uvh ~/lustre2.12.1/*.rpm  --force"
            ssh root@${host_address[j]} "rpm -qa | grep lustre"
            if [ `echo $?` != 0 ];then
                echo "安装失败,请排错!"
                exit
            fi
            echo "############# ${host_address[j]}配置完毕 #############"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-安装lustre #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
#echo "########################## 请手动重启 ##########################"
for ((i=0;i<100;i++));do
    read -p "是否重启集群主机(只有重启kernel内核才能更换生效)?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
            ssh root@${host_address[j]} "reboot" 
            continue
        done
    
        for ((k=0;k<100;k++));do
            if [ ${k} -eq 99 ];then
                echo "############# 设备连接超时.... #############"
                exit
            fi
            if [ `ssh root@${host_address[0]} -o ConnectTimeout=5 "exit";echo $?` == 0 -a `ssh root@${host_address[1]} -o ConnectTimeout=5 "exit";echo $?` == 0 ];then
                echo "############# 设备已重启 #############"
                break
            else
                echo "############# 设备正在重启 #############"
            fi
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-重启 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
sleep 5
​
​
# 检查lustre
for((i=0;i<2;i++));do
    echo "########################## ${host_address[i]}加载Lustre模块,查看Lustre版本 ##########################"
    ssh root@${host_address[i]} "modprobe lustre && lsmod | grep lustre"
    ssh root@${host_address[i]} "modinfo lustre"
    echo "############# ${host_address[i]}配置完毕 #############"
done
​
​
​
# 配置配置Lnet网卡的IP地址和Lustre网络
for ((i=0;i<100;i++));do
    read -p "是否继续配置Lnet网卡的IP地址?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
            echo "########################## ${host_address[j]}配置Lnet网卡的IP地址 ##########################"
            read -p "请根据以上输出显示中,输入你要配置的LnNet网卡名称:" network_card
            if [ -z "`ssh root@${host_address[j]} "ip addr | grep -o ${network_card}"`" ];then
                echo "网卡不存在,请重试"
                exit
            fi
            ssh root@${host_address[j]} "nmcli connection delete ${network_card} &> /dev/null"
            ssh root@${host_address[j]} "nmcli connection add type ethernet con-name ${network_card} ifname ${network_card} ipv4.method manual ipv4.addresses '${lnet_address[j]}/24' autoconnect yes"
            ssh root@${host_address[j]} "nmcli connection up ${network_card}"
            echo "############# ${host_address[j]}配置完毕 #############"
            echo "########################## ${host_address[j]}配置Lnet网络 ##########################"
            ssh root@${host_address[j]} "echo options lnet networks='tcp(ens33),tcp2(${network_card})' > /etc/modprobe.d/lustre.conf"
            ssh root@${host_address[j]} "lustre_rmmod && modprobe -v lustre"
            echo "############# ${host_address[j]}配置完毕 #############"
            echo "############# ${host_address[j]}查看Lnet网络 #############"
            ssh root@${host_address[j]} "lctl list_nids"
            echo "##########################################################"
        done
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置Lnet网卡的IP地址 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
​
​
# 格式化lustre
for ((i=0;i<100;i++));do
    read -p "是否继续格式化lustre?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始格式化lustre ##########################"
        ssh root@${host_address[0]} "lsblk"
        ssh root@${host_address[0]} "mkfs.lustre --mgs --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdb"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdc"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdd"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sde"
        ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdf"
        echo "############# 格式化完毕 #############"
        echo "########################## 创建挂载点目录 ##########################"
        for((j=0;j<2;j++));do
            ssh root@${host_address[j]} "mkdir /mnt/mgs &> /dev/null;mkdir /mnt/mdt1 &> /dev/null;mkdir /mnt/mdt2 &> /dev/null;mkdir /mnt/ost1 &> /dev/null;mkdir /mnt/ost2 &> /dev/null"
        done
        echo "############# 创建完毕 #############"
        echo "############# ${host_address[1]}查看格式化 #############"
        ssh root@${host_address[1]} "blkid /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf"
        echo "##########################################################"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-格式化lustre #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
# 测试挂载
for ((i=0;i<100;i++));do
    read -p "是否继续测试lustre的挂载?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
        echo "########################## ${host_address[j]}测试挂载 ##########################"
            echo "########################## mgs测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/mgs &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdb /mnt/mgs"
            if [ `echo $?` != 0 ];then
                echo "############# mgs测试有误,请手动查看! #############"
                exit
            fi
            echo "########################## mdt1测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/mdt1 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdc /mnt/mdt1"
            if [ `echo $?` != 0 ];then
                echo "############# mdt1测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/mgs"
                exit
            fi
            echo "########################## mdt2测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/mdt2 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdd /mnt/mdt2"
            if [ `echo $?` != 0 ];then
                echo "############# mdt2测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/mdt1;umount /mnt/mgs"
                exit
            fi
            echo "########################## ost1测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/ost1 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sde /mnt/ost1"
            if [ `echo $?` != 0 ];then
                echo "############# ost1测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
                exit
            fi
            echo "########################## ost2测试挂载 ##########################"
            ssh root@${host_address[j]} "umount /mnt/ost2 &> /dev/null"
            ssh root@${host_address[j]} "mount -t lustre /dev/sdf /mnt/ost2"
            if [ `echo $?` != 0 ];then
                echo "############# ost2测试有误,请手动查看! #############"
                ssh root@${host_address[j]} "umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
                exit
            fi
            ssh root@${host_address[j]} "umount /mnt/ost2;umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
        done
        echo "############# 测试完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-测试挂载 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
​
# 安装packemaker和corosync软件和创建集群
for ((i=0;i<100;i++));do
    read -p "是否继续安装packemaker和corosync软件和创建集群?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        if [ -z "`ssh root@${host_address[0]} 'pcs status' | grep mycluster`" ];then
            for((j=0;j<2;j++));do
                echo "########################## ${host_address[j]}开始安装 ##########################"
                ssh root@${host_address[j]} "yum install pacemaker pcs policycoreutils-python -y"
                echo "############# ${host_address[j]}安装完毕 #############"
                echo "########################## ${host_address[j]}开始配置 ##########################"
                ssh root@${host_address[j]} "systemctl enable pcsd;systemctl restart pcsd"
                ssh root@${host_address[j]} "echo '${host_passwd}' |passwd --stdin hacluster"
                echo "############# ${host_address[j]}配置完毕 #############"
            done
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
​
spawn ssh root@${host_address[0]} "pcs cluster auth ${host_hostname[*]}"
​
## 开始进连续捕获
expect  {
        "Username:"          { send "hacluster\n";  exp_continue }
        "Password:"          { send "${host_passwd}\n"; exp_continue }
}
eof
            echo "########################## 开始创建集群 ##########################"
            ssh root@${host_address[0]} "pcs cluster setup --name mylustre ${host_hostname[*]}"
            echo "############# 创建完毕 #############"
        fi
        echo "########################## 启动集群 ##########################"
        ssh root@${host_address[0]} "pcs cluster start --all"
        echo "############# 启动完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-安装packemaker和corosync软件和配置 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
​
# 配置资源防护
for ((i=0;i<100;i++));do
    read -p "是否继续配置资源防护?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        echo "########################## 开始配置资源防护 ##########################"
        for((j=0;j<2;j++));do
            ssh root@${host_address[j]} "yum install -y fence-agents-all"
        done
        ssh root@${host_address[0]} "pcs property set stonith-enabled=true"
        if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping" | grep -c "Started"` -eq 2 ];then
            echo "############# stonith已创建,并且正常运行,跳过配置stonith #############"
            break
        fi
        if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping"` -eq 2 ];then
            ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
            ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
        fi
​
        ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[0]} fence_heuristics_ping ping_targets=${host_address[0]}"
        ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[1]} fence_heuristics_ping ping_targets=${host_address[1]}"
        ssh root@${host_address[0]} "pcs status"
        echo "############# 配置完毕 #############"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-配置资源防护 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done
​
​
# 创建lustre资源
for ((i=0;i<100;i++));do
    read -p "是否继续创建lustre资源(可删除后再创建)?(Y/n): " flag
    if [ "${flag}" == "Y" ];then
        sleep 3
        for((j=0;j<2;j++));do
            if [ -z `ssh root@${host_address[j]} 'rpm -qa | grep lustre-resource-agents-2.12.1-1'` ];then
                echo "########################## ${host_address[j]}安装ocf:lustre:Lustre包 ##########################"
                ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
                ssh root@${host_address[j]} "rpm -ivh lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
                echo "############# 安装完毕 #############"
            fi
        done
        echo "########################## 开始删除lustre资源 ##########################"
        ssh root@${host_address[0]} "pcs resource delete global-ost2 &> /dev/null;pcs resource delete global-ost1 &> /dev/null"
        ssh root@${host_address[0]} "pcs resource delete global-mdt1 &> /dev/null;pcs resource delete global-mdt2 &> /dev/null"
        ssh root@${host_address[0]} "pcs resource delete global-mgs &> /dev/null"
        echo "############# 删除完毕 #############"
        echo "########################## 开始创建lustre资源 ##########################"
        ssh root@${host_address[0]} "pcs resource create global-mgs ocf:lustre:Lustre target=/dev/sdb mountpoint=/mnt/mgs"
        if [ `echo $?` != 0 ];then
            echo "############# mgs资源创建有误,请手动查看! #############"
            exit
        fi
        ssh root@${host_address[0]} "pcs resource create global-mdt1 ocf:lustre:Lustre target=/dev/sdc mountpoint=/mnt/mdt1"
        if [ `echo $?` != 0 ];then
            echo "############# mdt1资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs resource create global-mdt2 ocf:lustre:Lustre target=/dev/sdd mountpoint=/mnt/mdt2"
        if [ `echo $?` != 0 ];then
            echo "############# mdt2资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs resource create global-ost1 ocf:lustre:Lustre target=/dev/sde mountpoint=/mnt/ost1"
        if [ `echo $?` != 0 ];then
            echo "############# ost1资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs resource create global-ost2 ocf:lustre:Lustre target=/dev/sdf mountpoint=/mnt/ost2"
        if [ `echo $?` != 0 ];then
            echo "############# ost2资源创建有误,请手动查看! #############"
            exit
        fi      
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-mgs global-mgs ${host_hostname[0]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt1 global-mdt1 ${host_hostname[0]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt2 global-mdt2 ${host_hostname[1]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost1 global-ost1 ${host_hostname[0]} 10"
        ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost2 global-ost2 ${host_hostname[1]} 10"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt1"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt2"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost1"
        ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost2"
        if [ `echo $?` != 0 ];then
            echo "############# 资源约束创建有误,请手动查看! #############"
            exit
        fi
        lnet_name1=`ssh root@${host_address[0]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
        lnet_name2=`ssh root@${host_address[1]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
        ssh root@${host_address[0]} "pcs resource delete ping-lnet &>/dev/null;pcs resource delete global-healthLUSTRE &> /dev/null"
        ssh root@${host_address[0]} "pcs resource create ping-lnet ocf:lustre:healthLNET lctl=true multiplier=1001 device=${lnet_name1} host_list='${lnet_address[0]}@tcp2 ${lnet_address[1]}@tcp2' --clone"
        ssh root@${host_address[0]} "pcs resource create global-healthLUSTRE ocf:lustre:healthLUSTRE --clone"
        if [ `echo $?` != 0 ];then
            echo "############# 资源监听创建有误,请手动查看! #############"
            exit
        fi
        echo "############# 创建完毕 #############"
        echo "##########################    查看集群状态    ##########################"
        echo "########################################################################"
        echo "########################################################################"
        ssh root@${host_address[0]} "pcs status"
        echo "########################################################################"
        echo "########################################################################"
        echo "################################################## 脚本到此全部执行完毕 ##################################################"
        break
    elif [ "${flag}" == "n" ];then
        echo "############# 已跳过步骤-创建lustre资源 #############"
        break
    elif [ ${i} -eq 99 ];then
        echo "############# 已退出 #############"
        exit
    else continue;fi
done

  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值