1.环境说明:
主机名 | 系统 | 挂载情况 | 网卡ens33,Lnet的IP地址 | Lustre集群名 | 内存 |
---|---|---|---|---|---|
mds005 | Centos7.9 | (共享磁盘)1个mgs,1个MDT,2个OST | 192.168.10.25/209.21 | global | 2G |
mds006 | Centos7.9 | (共享磁盘)1个mgs,1个MDT,2个OST | 192.168.10.26/209.22 | global | 2G |
manager | Centos7.9 | 无 | 192.168.10.5 | 无 | 2G |
注意:自动化脚本lustre_auto.sh在manager节点上,五块4G共享磁盘(/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde,/dev/sdf),两个网卡(管理IP地址,Lnet通信IP地址)
2.变量定义:
# ens33上的IP地址,即集群管理IP地址
host_address=(192.168.10.25 192.168.10.26)
# ens38上的IP地址(其他也可以,两个节点网卡名一样就行),即Lnet通信IP地址
lnet_address=(192.168.209.25 192.168.209.26)
# 集群节点的主机名和域名
host_hostname=(mds005 mds006)
# ssh连接密码,集群认证密码
host_passwd=110119
3.安装expect命令:
# 安装expect命令
expect -v &> /dev/null
if [ `echo $?` -ne 0 ];then
echo "没有expect,安装expect命令"
yum install -y expect
fi
4.配置免密登录:
# 配置免密登录
echo "########################## 本地开始配置ssh ##########################"
if [ `test -a ~/.ssh/id_rsa.pub;echo $?` == 0 ];then
echo "ssh公钥已创建"
else
echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh-keygen -t rsa -b 1024
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
".ssh/id_rsa)" { send "\n"; exp_continue }
"Overwrite (y/n)?" { send "y\n"; exp_continue }
"no passphrase):" { send "\n"; exp_continue }
"passphrase again:" { send "\n"; exp_continue }
}
eof
fi
# 本地的密钥开始加入被控制主机
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}正在被添加公钥 ##########################"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[j]}
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
}
eof
echo "############# ${host_address[j]}配置完毕 #############"
done
# 被控制主机开始创建密钥
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始创建密钥 ##########################"
if [ `ssh root@${host_address[j]} 'test -a ~/.ssh/id_rsa.pub;echo $?'` == 0 ];then
echo "ssh公钥已创建"
else
echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh root@${host_address[j]} "ssh-keygen -t rsa -b 1024"
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
".ssh/id_rsa)" { send "\n"; exp_continue }
"Overwrite (y/n)?" { send "y\n"; exp_continue }
"no passphrase):" { send "\n"; exp_continue }
"passphrase again:" { send "\n"; exp_continue }
}
eof
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
# 被控制主机开始分配密钥
for ((j=0;j<2;j++));do
for((k=0;k<2;k++));do
echo "########################## ${host_address[j]}开始分配公钥给${host_address[k]} ##########################"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh -t root@${host_address[j]} "ssh root@${host_address[k]}"
spawn ssh -t root@${host_address[j]} "ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[k]}"
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
}
eof
echo "############# ${host_address[j]}配置完毕 #############"
done
done
5.主机名域名映射:
for ((i=0;i<100;i++));do
read -p "修改主机名和配置域名映射?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置主机名和域名映射 ##########################"
for ((j=0;j<2;j++));do
if [ `ssh root@${host_address[j]} "hostname"` != "${host_hostname[j]}" ];then
ssh root@${host_address[j]} "hostnamectl set-hostname ${host_hostname[j]}"
fi
ssh root@${host_address[j]} "cat << eof > /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
eof"
for ((k=0;k<2;k++));do
ssh root@${host_address[j]} "echo '${host_address[k]} ${host_hostname[k]}' >> /etc/hosts"
done
done
echo "############# 配置完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-修改主机名和配置域名映射 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 测试被控主机ssh连接
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始测试 ##########################"
for ((k=0;k<2;k++));do
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh -t root@${host_address[j]} "ssh root@${host_address[k]} 'exit'"
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
}
eof
if [ `echo $?` != 0 ];then
echo "${host_hostname[j]}主机无法免密登录${host_hostname[k]}"
exit
fi
done
echo "############# ${host_address[j]}测试完毕 #############"
done
6.配置防火墙和selinux:
# 配置防火墙和selinux
for ((i=0;i<100;i++));do
read -p "配置防火墙和selinux?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置防火墙和selinux ##########################"
for ((j=0;j<2;j++));do
ssh root@${host_address[j]} "systemctl stop firewalld;systemctl disable firewalld"
ssh root@${host_address[j]} "sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config"
done
echo "############# 配置完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置防火墙和selinux #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
7.配置yum源:
# 配置yum源
for ((i=0;i<100;i++));do
read -p "配置yum源?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置yum源 ##########################"
for ((j=0;j<2;j++));do
echo "########################## 配置${host_address[j]}的本地yum源 ##########################"
ssh root@${host_address[j]} "mkdir /mnt/cdrom &> /dev/null;mount /dev/cdrom /mnt/cdrom"
if [ -z "`ssh root@${host_address[j]} "grep '^\/dev\/cdrom' /etc/fstab"`" ];then
ssh root@${host_address[j]} "cat << eof >> /etc/fstab
/dev/cdrom /mnt/cdrom iso9660 defaults 0 0
eof"
fi
ssh root@${host_address[j]} "cat << eof > /etc/yum.repos.d/centos-local.repo
[centos7.9]
name=centos7.9
baseurl=file:///mnt/cdrom
enabled=1
gpgcheck=0
eof"
echo "############# ${host_address[j]}配置完毕 #############"
echo "########################## 配置${host_address[j]}的扩展源 ##########################"
ssh root@${host_address[j]} "yum install epel-release -y"
echo "############# ${host_address[j]}配置完毕 #############"
echo "########################## 配置${host_address[j]}的阿里yum源 ##########################"
ssh root@${host_address[j]} "yum install -y wget"
if [ `ssh root@${host_address[j]} 'test -a /etc/yum.repos.d/CentOS-Base.repo;echo $?'` == 0 ];then
ssh root@${host_address[j]} "wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo"
fi
ssh root@${host_address[j]} "yum clean all && yum repolist"
echo "############# ${host_address[j]}配置完毕 #############"
if [ `ssh root@${host_address[j]} "echo $?"` != 0 ];then
echo "yum源配置有误,退出执行脚本"
exit
fi
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置yum源 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
8.配置chrony时间服务器:
# 配置chrony时间服务器
for ((i=0;i<100;i++));do
read -p "配置chrony时间服务器?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置chrony ##########################"
for ((j=0;j<2;j++));do
if [ `ssh root@${host_address[j]} "systemctl restart chronyd;echo $?"` != 0 ];then
echo "${host_address[j]} 安装chrony"
ssh root@${host_address[j]} "yum install -y chrony && systemctl restart chronyd"
if [ `echo $?` != 0 ];then
echo "安装失败,请排错!"
exit
fi
fi
echo "${host_address[j]}配置chrony"
ssh root@${host_address[j]} "sed -i '/^server/d' /etc/chrony.conf"
if [ ${host_address[j]} == ${host_address[0]} ];then
ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
ssh root@${host_address[j]} "sed -i 's/#allow 192.168.0.0\/16/allow 192.168.10.0\/16/' /etc/chrony.conf"
ssh root@${host_address[j]} "sed -i 's/#local stratum 10/local stratum 10/' /etc/chrony.conf"
sleep 2
else
ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
fi
ssh root@${host_address[j]} "systemctl restart chronyd && systemctl enable chronyd &> /dev/null"
sleep 5
ssh root@${host_address[j]} "timedatectl set-ntp true && chronyc sources -v | sed -n '/^\^\*/p'"
if [ -z "`ssh root@${host_address[j]} "chronyc sources -v | sed -n '/^\^\*/p'"`" ];then
echo -e "\e[31m此节点${host_address[j]}的chrony配置有误,请手动调试\e[0m"
exit
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置chrony时间服务器 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
9.安装e2fsprogs:
# 安装e2fsprogs
for ((i=0;i<100;i++));do
read -p "安装e2fsprogs?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始安装e2fsprogs ##########################"
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始安装e2fsprogs ##########################"
ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5 && mkdir ~/e2fsprogs1.44.5"
ssh root@${host_address[j]} "wget -c -r -nd https://downloads.whamcloud.com/public/e2fsprogs/1.44.5.wc1/el7/RPMS/x86_64/ -P ~/e2fsprogs1.44.5"
ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5/index.html* ~/e2fsprogs1.44.5/unknown.gif ~/e2fsprogs1.44.5/*.gif ~/e2fsprogs1.44.5/sha256sum"
ssh root@${host_address[j]} "rpm -Uvh ~/e2fsprogs1.44.5/* --force"
ssh root@${host_address[j]} "rpm -qa | grep e2fsprogs"
if [ `echo $?` != 0 ];then
echo "安装失败,请排错!"
exit
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-安装e2fsprogs #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
10.安装lustre软件:
for ((i=0;i<100;i++));do
read -p "安装Lustre软件?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始安装lustre ##########################"
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始安装lustre ##########################"
ssh root@${host_address[j]} "yum install -y linux-firmware dracut selinux-policy-targeted kexec-tools libyaml perl"
ssh root@${host_address[j]} "rm -rf ~/lustre2.12.1 && mkdir ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kernel-3.10.0-957.10.1.el7_lustre.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-osd-ldiskfs-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-osd-ldiskfs-mount-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "rpm -Uvh ~/lustre2.12.1/*.rpm --force"
ssh root@${host_address[j]} "rpm -qa | grep lustre"
if [ `echo $?` != 0 ];then
echo "安装失败,请排错!"
exit
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-安装lustre #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
11.控制重启和检测重启:
for ((i=0;i<100;i++));do
read -p "是否重启集群主机(只有重启kernel内核才能更换生效)?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
ssh root@${host_address[j]} "reboot"
continue
done
for ((k=0;k<100;k++));do
if [ ${k} -eq 99 ];then
echo "############# 设备连接超时.... #############"
exit
fi
if [ `ssh root@${host_address[0]} -o ConnectTimeout=5 "exit";echo $?` == 0 -a `ssh root@${host_address[1]} -o ConnectTimeout=5 "exit";echo $?` == 0 ];then
echo "############# 设备已重启 #############"
break
else
echo "############# 设备正在重启 #############"
fi
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-重启 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
12.加载模块:
# 检查lustre
for((i=0;i<2;i++));do
echo "########################## ${host_address[i]}加载Lustre模块,查看Lustre版本 ##########################"
ssh root@${host_address[i]} "modprobe lustre && lsmod | grep lustre"
ssh root@${host_address[i]} "modinfo lustre"
echo "############# ${host_address[i]}配置完毕 #############"
done
13.配置和Lustre网络:
# 配置Lnet网卡的IP地址和Lustre网络
for ((i=0;i<100;i++));do
read -p "是否继续配置Lnet网卡的IP地址?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
echo "########################## ${host_address[j]}配置Lnet网卡的IP地址 ##########################"
read -p "请根据以上输出显示中,输入你要配置的LnNet网卡名称:" network_card
if [ -z "`ssh root@${host_address[j]} "ip addr | grep -o ${network_card}"`" ];then
echo "网卡不存在,请重试"
exit
fi
ssh root@${host_address[j]} "nmcli connection delete ${network_card} &> /dev/null"
ssh root@${host_address[j]} "nmcli connection add type ethernet con-name ${network_card} ifname ${network_card} ipv4.method manual ipv4.addresses '${lnet_address[j]}/24' autoconnect yes"
ssh root@${host_address[j]} "nmcli connection up ${network_card}"
echo "############# ${host_address[j]}配置完毕 #############"
echo "########################## ${host_address[j]}配置Lnet网络 ##########################"
ssh root@${host_address[j]} "echo options lnet networks='tcp(ens33),tcp2(${network_card})' > /etc/modprobe.d/lustre.conf"
ssh root@${host_address[j]} "lustre_rmmod && modprobe -v lustre"
echo "############# ${host_address[j]}配置完毕 #############"
echo "############# ${host_address[j]}查看Lnet网络 #############"
ssh root@${host_address[j]} "lctl list_nids"
echo "##########################################################"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置Lnet网卡的IP地址 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
14.格式化lustre:
# 格式化lustre
for ((i=0;i<100;i++));do
read -p "是否继续格式化lustre?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始格式化lustre ##########################"
ssh root@${host_address[0]} "lsblk"
ssh root@${host_address[0]} "mkfs.lustre --mgs --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdb"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdc"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdd"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sde"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdf"
echo "############# 格式化完毕 #############"
echo "########################## 创建挂载点目录 ##########################"
for((j=0;j<2;j++));do
ssh root@${host_address[j]} "mkdir /mnt/mgs &> /dev/null;mkdir /mnt/mdt1 &> /dev/null;mkdir /mnt/mdt2 &> /dev/null;mkdir /mnt/ost1 &> /dev/null;mkdir /mnt/ost2 &> /dev/null"
done
echo "############# 创建完毕 #############"
echo "############# ${host_address[1]}查看格式化 #############"
ssh root@${host_address[1]} "blkid /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf"
echo "##########################################################"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-格式化lustre #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
15.测试lustre挂载:
# 测试挂载
for ((i=0;i<100;i++));do
read -p "是否继续测试lustre的挂载?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
echo "########################## ${host_address[j]}测试挂载 ##########################"
echo "########################## mgs测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/mgs &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdb /mnt/mgs"
if [ `echo $?` != 0 ];then
echo "############# mgs测试有误,请手动查看! #############"
exit
fi
echo "########################## mdt1测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/mdt1 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdc /mnt/mdt1"
if [ `echo $?` != 0 ];then
echo "############# mdt1测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/mgs"
exit
fi
echo "########################## mdt2测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/mdt2 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdd /mnt/mdt2"
if [ `echo $?` != 0 ];then
echo "############# mdt2测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/mdt1;umount /mnt/mgs"
exit
fi
echo "########################## ost1测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/ost1 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sde /mnt/ost1"
if [ `echo $?` != 0 ];then
echo "############# ost1测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
exit
fi
echo "########################## ost2测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/ost2 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdf /mnt/ost2"
if [ `echo $?` != 0 ];then
echo "############# ost2测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
exit
fi
ssh root@${host_address[j]} "umount /mnt/ost2;umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
done
echo "############# 测试完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-测试挂载 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
16.安装高可用和创建集群:
# 安装packemaker和corosync软件和创建集群
for ((i=0;i<100;i++));do
read -p "是否继续安装packemaker和corosync软件和创建集群?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
if [ -z "`ssh root@${host_address[0]} 'pcs status' | grep mycluster`" ];then
for((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始安装 ##########################"
ssh root@${host_address[j]} "yum install pacemaker pcs policycoreutils-python -y"
echo "############# ${host_address[j]}安装完毕 #############"
echo "########################## ${host_address[j]}开始配置 ##########################"
ssh root@${host_address[j]} "systemctl enable pcsd;systemctl restart pcsd"
ssh root@${host_address[j]} "echo '${host_passwd}' |passwd --stdin hacluster"
echo "############# ${host_address[j]}配置完毕 #############"
done
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh root@${host_address[0]} "pcs cluster auth ${host_hostname[*]}"
## 开始进连续捕获
expect {
"Username:" { send "hacluster\n"; exp_continue }
"Password:" { send "${host_passwd}\n"; exp_continue }
}
eof
echo "########################## 开始创建集群 ##########################"
ssh root@${host_address[0]} "pcs cluster setup --name mylustre ${host_hostname[*]}"
echo "############# 创建完毕 #############"
fi
echo "########################## 启动集群 ##########################"
ssh root@${host_address[0]} "pcs cluster start --all"
echo "############# 启动完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-安装packemaker和corosync软件和配置 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
17.配置资源防护:
# 配置资源防护
for ((i=0;i<100;i++));do
read -p "是否继续配置资源防护?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置资源防护 ##########################"
for((j=0;j<2;j++));do
ssh root@${host_address[j]} "yum install -y fence-agents-all"
done
ssh root@${host_address[0]} "pcs property set stonith-enabled=true"
if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping" | grep -c "Started"` -eq 2 ];then
echo "############# stonith已创建,并且正常运行,跳过配置stonith #############"
break
fi
if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping"` -eq 2 ];then
ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
fi
ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[0]} fence_heuristics_ping ping_targets=${host_address[0]}"
ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[1]} fence_heuristics_ping ping_targets=${host_address[1]}"
ssh root@${host_address[0]} "pcs status"
echo "############# 配置完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置资源防护 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
18.创建lustre资源:
# 创建lustre资源
for ((i=0;i<100;i++));do
read -p "是否继续创建lustre资源(可删除后再创建)?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
if [ -z `ssh root@${host_address[j]} 'rpm -qa | grep lustre-resource-agents-2.12.1-1'` ];then
echo "########################## ${host_address[j]}安装ocf:lustre:Lustre包 ##########################"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
ssh root@${host_address[j]} "rpm -ivh lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
echo "############# 安装完毕 #############"
fi
done
echo "########################## 开始删除lustre资源 ##########################"
ssh root@${host_address[0]} "pcs resource delete global-ost2 &> /dev/null;pcs resource delete global-ost1 &> /dev/null"
ssh root@${host_address[0]} "pcs resource delete global-mdt1 &> /dev/null;pcs resource delete global-mdt2 &> /dev/null"
ssh root@${host_address[0]} "pcs resource delete global-mgs &> /dev/null"
echo "############# 删除完毕 #############"
echo "########################## 开始创建lustre资源 ##########################"
ssh root@${host_address[0]} "pcs resource create global-mgs ocf:lustre:Lustre target=/dev/sdb mountpoint=/mnt/mgs"
if [ `echo $?` != 0 ];then
echo "############# mgs资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-mdt1 ocf:lustre:Lustre target=/dev/sdc mountpoint=/mnt/mdt1"
if [ `echo $?` != 0 ];then
echo "############# mdt1资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-mdt2 ocf:lustre:Lustre target=/dev/sdd mountpoint=/mnt/mdt2"
if [ `echo $?` != 0 ];then
echo "############# mdt2资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-ost1 ocf:lustre:Lustre target=/dev/sde mountpoint=/mnt/ost1"
if [ `echo $?` != 0 ];then
echo "############# ost1资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-ost2 ocf:lustre:Lustre target=/dev/sdf mountpoint=/mnt/ost2"
if [ `echo $?` != 0 ];then
echo "############# ost2资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs constraint location add global-constraint-mgs global-mgs ${host_hostname[0]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt1 global-mdt1 ${host_hostname[0]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt2 global-mdt2 ${host_hostname[1]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost1 global-ost1 ${host_hostname[0]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost2 global-ost2 ${host_hostname[1]} 10"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt1"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt2"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost1"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost2"
if [ `echo $?` != 0 ];then
echo "############# 资源约束创建有误,请手动查看! #############"
exit
fi
lnet_name1=`ssh root@${host_address[0]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
lnet_name2=`ssh root@${host_address[1]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
ssh root@${host_address[0]} "pcs resource delete ping-lnet &>/dev/null;pcs resource delete global-healthLUSTRE &> /dev/null"
ssh root@${host_address[0]} "pcs resource create ping-lnet ocf:lustre:healthLNET lctl=true multiplier=1001 device=${lnet_name1} host_list='${lnet_address[0]}@tcp2 ${lnet_address[1]}@tcp2' --clone"
ssh root@${host_address[0]} "pcs resource create global-healthLUSTRE ocf:lustre:healthLUSTRE --clone"
if [ `echo $?` != 0 ];then
echo "############# 资源监听创建有误,请手动查看! #############"
exit
fi
echo "############# 创建完毕 #############"
echo "########################## 查看集群状态 ##########################"
echo "########################################################################"
echo "########################################################################"
ssh root@${host_address[0]} "pcs status"
echo "########################################################################"
echo "########################################################################"
echo "################################################## 脚本到此全部执行完毕 ##################################################"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-创建lustre资源 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
19.完整脚本:
# 高可用双机lustre集群的的自动化部署脚本,(部署条件:配置了共享磁盘(/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde,/dev/sdf)。并且有两个网卡,其中一个网卡已经配置了IP地址)
host_address=(192.168.10.25 192.168.10.26)
lnet_address=(192.168.209.25 192.168.209.26)
host_hostname=(mds005 mds006)
host_passwd=110119
# 安装expect命令
expect -v &> /dev/null
if [ `echo $?` -ne 0 ];then
echo "没有expect,安装expect命令"
yum install -y expect
fi
# 配置免密登录
echo "########################## 本地开始配置ssh ##########################"
if [ `test -a ~/.ssh/id_rsa.pub;echo $?` == 0 ];then
echo "ssh公钥已创建"
else
echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh-keygen -t rsa -b 1024
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
".ssh/id_rsa)" { send "\n"; exp_continue }
"Overwrite (y/n)?" { send "y\n"; exp_continue }
"no passphrase):" { send "\n"; exp_continue }
"passphrase again:" { send "\n"; exp_continue }
}
eof
fi
# 本地的密钥开始加入被控制主机
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}正在被添加公钥 ##########################"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[j]}
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
}
eof
echo "############# ${host_address[j]}配置完毕 #############"
done
# 被控制主机开始创建密钥
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始创建密钥 ##########################"
if [ `ssh root@${host_address[j]} 'test -a ~/.ssh/id_rsa.pub;echo $?'` == 0 ];then
echo "ssh公钥已创建"
else
echo "ssh公钥未创建,开始创建"
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh root@${host_address[j]} "ssh-keygen -t rsa -b 1024"
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
".ssh/id_rsa)" { send "\n"; exp_continue }
"Overwrite (y/n)?" { send "y\n"; exp_continue }
"no passphrase):" { send "\n"; exp_continue }
"passphrase again:" { send "\n"; exp_continue }
}
eof
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
# 被控制主机开始分配密钥
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始分配公钥 ##########################"
for ((k=0;k<2;k++));do
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh root@${host_address[j]} "ssh-copy-id -i /root/.ssh/id_rsa.pub root@${host_address[k]}"
## 开始进连续捕获
expect {
"connecting (yes/no)?" { send "yes\n"; exp_continue }
"s password:" { send "${host_passwd}\n"; exp_continue }
}
eof
done
echo "############# ${host_address[j]}分配完毕 #############"
done
# 修改主机名和配置域名映射
for ((i=0;i<100;i++));do
read -p "修改主机名和配置域名映射?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置主机名和域名映射 ##########################"
for ((j=0;j<2;j++));do
if [ `ssh root@${host_address[j]} "hostname"` != "${host_hostname[j]}" ];then
ssh root@${host_address[j]} "hostnamectl set-hostname ${host_hostname[j]}"
fi
ssh root@${host_address[j]} "cat << eof > /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
eof"
for ((k=0;k<2;k++));do
ssh root@${host_address[j]} "echo '${host_address[k]} ${host_hostname[k]}' >> /etc/hosts"
done
done
echo "############# 配置完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-修改主机名和配置域名映射 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 配置防火墙和selinux
for ((i=0;i<100;i++));do
read -p "配置防火墙和selinux?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置防火墙和selinux ##########################"
for ((j=0;j<2;j++));do
ssh root@${host_address[j]} "systemctl stop firewalld;systemctl disable firewalld"
ssh root@${host_address[j]} "sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config"
done
echo "############# 配置完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置防火墙和selinux #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 配置yum源
for ((i=0;i<100;i++));do
read -p "配置yum源?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置ssh ##########################"
for ((j=0;j<2;j++));do
echo "########################## 配置${host_address[j]}的本地yum源 ##########################"
ssh root@${host_address[j]} "mkdir /mnt/cdrom &> /dev/null;mount /dev/cdrom /mnt/cdrom"
if [ -z "`ssh root@${host_address[j]} "grep '^\/dev\/cdrom' /etc/fstab"`" ];then
ssh root@${host_address[j]} "cat << eof >> /etc/fstab
/dev/cdrom /mnt/cdrom iso9660 defaults 0 0
eof"
fi
ssh root@${host_address[j]} "cat << eof > /etc/yum.repos.d/centos-local.repo
[centos7.9]
name=centos7.9
baseurl=file:///mnt/cdrom
enabled=1
gpgcheck=0
eof"
echo "############# ${host_address[j]}配置完毕 #############"
echo "########################## 配置${host_address[j]}的扩展源 ##########################"
ssh root@${host_address[j]} "yum install epel-release -y"
echo "############# ${host_address[j]}配置完毕 #############"
echo "########################## 配置${host_address[j]}的阿里yum源 ##########################"
ssh root@${host_address[j]} "yum install -y wget"
if [ `ssh root@${host_address[j]} 'test -a /etc/yum.repos.d/CentOS-Base.repo;echo $?'` == 0 ];then
ssh root@${host_address[j]} "wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo"
fi
ssh root@${host_address[j]} "yum clean all && yum repolist"
echo "############# ${host_address[j]}配置完毕 #############"
if [ `ssh root@${host_address[j]} "echo $?"` != 0 ];then
echo "yum源配置有误,退出执行脚本"
exit
fi
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置yum源 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 配置chrony时间服务器
for ((i=0;i<100;i++));do
read -p "配置chrony时间服务器?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置chrony ##########################"
for ((j=0;j<2;j++));do
if [ `ssh root@${host_address[j]} "systemctl restart chronyd;echo $?"` != 0 ];then
echo "${host_address[j]} 安装chrony"
ssh root@${host_address[j]} "yum install -y chrony && systemctl restart chronyd"
if [ `echo $?` != 0 ];then
echo "安装失败,请排错!"
exit
fi
fi
echo "${host_address[j]}配置chrony"
ssh root@${host_address[j]} "sed -i '/^server/d' /etc/chrony.conf"
if [ ${host_address[j]} == ${host_address[0]} ];then
ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
ssh root@${host_address[j]} "sed -i 's/#allow 192.168.0.0\/16/allow 192.168.10.0\/16/' /etc/chrony.conf"
ssh root@${host_address[j]} "sed -i 's/#local stratum 10/local stratum 10/' /etc/chrony.conf"
sleep 2
else
ssh root@${host_address[j]} "sed -i '2a\server '"${host_address[0]}"' iburst\' /etc/chrony.conf"
fi
ssh root@${host_address[j]} "systemctl restart chronyd && systemctl enable chronyd &> /dev/null"
sleep 5
ssh root@${host_address[j]} "timedatectl set-ntp true && chronyc sources -v | sed -n '/^\^\*/p'"
if [ -z "`ssh root@${host_address[j]} "chronyc sources -v | sed -n '/^\^\*/p'"`" ];then
echo -e "\e[31m此节点${host_address[j]}的chrony配置有误,请手动调试\e[0m"
exit
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置chrony时间服务器 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 安装e2fsprogs
for ((i=0;i<100;i++));do
read -p "安装e2fsprogs?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始安装e2fsprogs ##########################"
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始安装e2fsprogs ##########################"
ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5 && mkdir ~/e2fsprogs1.44.5"
ssh root@${host_address[j]} "wget -c -r -nd https://downloads.whamcloud.com/public/e2fsprogs/1.44.5.wc1/el7/RPMS/x86_64/ -P ~/e2fsprogs1.44.5"
ssh root@${host_address[j]} "rm -rf ~/e2fsprogs1.44.5/index.html* ~/e2fsprogs1.44.5/unknown.gif ~/e2fsprogs1.44.5/*.gif ~/e2fsprogs1.44.5/sha256sum"
ssh root@${host_address[j]} "rpm -Uvh ~/e2fsprogs1.44.5/* --force"
ssh root@${host_address[j]} "rpm -qa | grep e2fsprogs"
if [ `echo $?` != 0 ];then
echo "安装失败,请排错!"
exit
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-安装e2fsprogs #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 安装lustre
for ((i=0;i<100;i++));do
read -p "安装Lustre软件?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始安装lustre ##########################"
for ((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始安装lustre ##########################"
ssh root@${host_address[j]} "yum install -y linux-firmware dracut selinux-policy-targeted kexec-tools libyaml perl"
ssh root@${host_address[j]} "rm -rf ~/lustre2.12.1 && mkdir ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kernel-3.10.0-957.10.1.el7_lustre.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/kmod-lustre-osd-ldiskfs-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-osd-ldiskfs-mount-2.12.1-1.el7.x86_64.rpm -P ~/lustre2.12.1"
ssh root@${host_address[j]} "rpm -Uvh ~/lustre2.12.1/*.rpm --force"
ssh root@${host_address[j]} "rpm -qa | grep lustre"
if [ `echo $?` != 0 ];then
echo "安装失败,请排错!"
exit
fi
echo "############# ${host_address[j]}配置完毕 #############"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-安装lustre #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
#echo "########################## 请手动重启 ##########################"
for ((i=0;i<100;i++));do
read -p "是否重启集群主机(只有重启kernel内核才能更换生效)?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
ssh root@${host_address[j]} "reboot"
continue
done
for ((k=0;k<100;k++));do
if [ ${k} -eq 99 ];then
echo "############# 设备连接超时.... #############"
exit
fi
if [ `ssh root@${host_address[0]} -o ConnectTimeout=5 "exit";echo $?` == 0 -a `ssh root@${host_address[1]} -o ConnectTimeout=5 "exit";echo $?` == 0 ];then
echo "############# 设备已重启 #############"
break
else
echo "############# 设备正在重启 #############"
fi
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-重启 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
sleep 5
# 检查lustre
for((i=0;i<2;i++));do
echo "########################## ${host_address[i]}加载Lustre模块,查看Lustre版本 ##########################"
ssh root@${host_address[i]} "modprobe lustre && lsmod | grep lustre"
ssh root@${host_address[i]} "modinfo lustre"
echo "############# ${host_address[i]}配置完毕 #############"
done
# 配置配置Lnet网卡的IP地址和Lustre网络
for ((i=0;i<100;i++));do
read -p "是否继续配置Lnet网卡的IP地址?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
echo "########################## ${host_address[j]}配置Lnet网卡的IP地址 ##########################"
read -p "请根据以上输出显示中,输入你要配置的LnNet网卡名称:" network_card
if [ -z "`ssh root@${host_address[j]} "ip addr | grep -o ${network_card}"`" ];then
echo "网卡不存在,请重试"
exit
fi
ssh root@${host_address[j]} "nmcli connection delete ${network_card} &> /dev/null"
ssh root@${host_address[j]} "nmcli connection add type ethernet con-name ${network_card} ifname ${network_card} ipv4.method manual ipv4.addresses '${lnet_address[j]}/24' autoconnect yes"
ssh root@${host_address[j]} "nmcli connection up ${network_card}"
echo "############# ${host_address[j]}配置完毕 #############"
echo "########################## ${host_address[j]}配置Lnet网络 ##########################"
ssh root@${host_address[j]} "echo options lnet networks='tcp(ens33),tcp2(${network_card})' > /etc/modprobe.d/lustre.conf"
ssh root@${host_address[j]} "lustre_rmmod && modprobe -v lustre"
echo "############# ${host_address[j]}配置完毕 #############"
echo "############# ${host_address[j]}查看Lnet网络 #############"
ssh root@${host_address[j]} "lctl list_nids"
echo "##########################################################"
done
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置Lnet网卡的IP地址 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 格式化lustre
for ((i=0;i<100;i++));do
read -p "是否继续格式化lustre?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始格式化lustre ##########################"
ssh root@${host_address[0]} "lsblk"
ssh root@${host_address[0]} "mkfs.lustre --mgs --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdb"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdc"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --mdt --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdd"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=0 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sde"
ssh root@${host_address[0]} "mkfs.lustre --fsname global --ost --index=1 --servicenode=${lnet_address[0]}@tcp2 --servicenode=${lnet_address[1]}@tcp2 --mgsnode=${lnet_address[0]}@tcp2 --mgsnode=${lnet_address[1]}@tcp2 --backfstype=ldiskfs --reformat /dev/sdf"
echo "############# 格式化完毕 #############"
echo "########################## 创建挂载点目录 ##########################"
for((j=0;j<2;j++));do
ssh root@${host_address[j]} "mkdir /mnt/mgs &> /dev/null;mkdir /mnt/mdt1 &> /dev/null;mkdir /mnt/mdt2 &> /dev/null;mkdir /mnt/ost1 &> /dev/null;mkdir /mnt/ost2 &> /dev/null"
done
echo "############# 创建完毕 #############"
echo "############# ${host_address[1]}查看格式化 #############"
ssh root@${host_address[1]} "blkid /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf"
echo "##########################################################"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-格式化lustre #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 测试挂载
for ((i=0;i<100;i++));do
read -p "是否继续测试lustre的挂载?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
echo "########################## ${host_address[j]}测试挂载 ##########################"
echo "########################## mgs测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/mgs &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdb /mnt/mgs"
if [ `echo $?` != 0 ];then
echo "############# mgs测试有误,请手动查看! #############"
exit
fi
echo "########################## mdt1测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/mdt1 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdc /mnt/mdt1"
if [ `echo $?` != 0 ];then
echo "############# mdt1测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/mgs"
exit
fi
echo "########################## mdt2测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/mdt2 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdd /mnt/mdt2"
if [ `echo $?` != 0 ];then
echo "############# mdt2测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/mdt1;umount /mnt/mgs"
exit
fi
echo "########################## ost1测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/ost1 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sde /mnt/ost1"
if [ `echo $?` != 0 ];then
echo "############# ost1测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
exit
fi
echo "########################## ost2测试挂载 ##########################"
ssh root@${host_address[j]} "umount /mnt/ost2 &> /dev/null"
ssh root@${host_address[j]} "mount -t lustre /dev/sdf /mnt/ost2"
if [ `echo $?` != 0 ];then
echo "############# ost2测试有误,请手动查看! #############"
ssh root@${host_address[j]} "umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
exit
fi
ssh root@${host_address[j]} "umount /mnt/ost2;umount /mnt/ost1;umount /mnt/mdt2;umount /mnt/mdt1;umount /mnt/mgs"
done
echo "############# 测试完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-测试挂载 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 安装packemaker和corosync软件和创建集群
for ((i=0;i<100;i++));do
read -p "是否继续安装packemaker和corosync软件和创建集群?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
if [ -z "`ssh root@${host_address[0]} 'pcs status' | grep mycluster`" ];then
for((j=0;j<2;j++));do
echo "########################## ${host_address[j]}开始安装 ##########################"
ssh root@${host_address[j]} "yum install pacemaker pcs policycoreutils-python -y"
echo "############# ${host_address[j]}安装完毕 #############"
echo "########################## ${host_address[j]}开始配置 ##########################"
ssh root@${host_address[j]} "systemctl enable pcsd;systemctl restart pcsd"
ssh root@${host_address[j]} "echo '${host_passwd}' |passwd --stdin hacluster"
echo "############# ${host_address[j]}配置完毕 #############"
done
/usr/bin/expect << eof
# 设置捕获字符串后,期待回复的超时时间
set timeout 10
spawn ssh root@${host_address[0]} "pcs cluster auth ${host_hostname[*]}"
## 开始进连续捕获
expect {
"Username:" { send "hacluster\n"; exp_continue }
"Password:" { send "${host_passwd}\n"; exp_continue }
}
eof
echo "########################## 开始创建集群 ##########################"
ssh root@${host_address[0]} "pcs cluster setup --name mylustre ${host_hostname[*]}"
echo "############# 创建完毕 #############"
fi
echo "########################## 启动集群 ##########################"
ssh root@${host_address[0]} "pcs cluster start --all"
echo "############# 启动完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-安装packemaker和corosync软件和配置 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 配置资源防护
for ((i=0;i<100;i++));do
read -p "是否继续配置资源防护?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
echo "########################## 开始配置资源防护 ##########################"
for((j=0;j<2;j++));do
ssh root@${host_address[j]} "yum install -y fence-agents-all"
done
ssh root@${host_address[0]} "pcs property set stonith-enabled=true"
if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping" | grep -c "Started"` -eq 2 ];then
echo "############# stonith已创建,并且正常运行,跳过配置stonith #############"
break
fi
if [ `ssh root@${host_address[j]} "pcs status" | grep "stonith:fence_heuristics_ping"` -eq 2 ];then
ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
ssh root@${host_address[0]} "pcs stonith delete stonith-ping-${host_hostname[0]}"
fi
ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[0]} fence_heuristics_ping ping_targets=${host_address[0]}"
ssh root@${host_address[0]} "pcs stonith create stonith-ping-${host_hostname[1]} fence_heuristics_ping ping_targets=${host_address[1]}"
ssh root@${host_address[0]} "pcs status"
echo "############# 配置完毕 #############"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-配置资源防护 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done
# 创建lustre资源
for ((i=0;i<100;i++));do
read -p "是否继续创建lustre资源(可删除后再创建)?(Y/n): " flag
if [ "${flag}" == "Y" ];then
sleep 3
for((j=0;j<2;j++));do
if [ -z `ssh root@${host_address[j]} 'rpm -qa | grep lustre-resource-agents-2.12.1-1'` ];then
echo "########################## ${host_address[j]}安装ocf:lustre:Lustre包 ##########################"
ssh root@${host_address[j]} "wget https://downloads.whamcloud.com/public/lustre/lustre-2.12.1/el7/server/RPMS/x86_64/lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
ssh root@${host_address[j]} "rpm -ivh lustre-resource-agents-2.12.1-1.el7.x86_64.rpm"
echo "############# 安装完毕 #############"
fi
done
echo "########################## 开始删除lustre资源 ##########################"
ssh root@${host_address[0]} "pcs resource delete global-ost2 &> /dev/null;pcs resource delete global-ost1 &> /dev/null"
ssh root@${host_address[0]} "pcs resource delete global-mdt1 &> /dev/null;pcs resource delete global-mdt2 &> /dev/null"
ssh root@${host_address[0]} "pcs resource delete global-mgs &> /dev/null"
echo "############# 删除完毕 #############"
echo "########################## 开始创建lustre资源 ##########################"
ssh root@${host_address[0]} "pcs resource create global-mgs ocf:lustre:Lustre target=/dev/sdb mountpoint=/mnt/mgs"
if [ `echo $?` != 0 ];then
echo "############# mgs资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-mdt1 ocf:lustre:Lustre target=/dev/sdc mountpoint=/mnt/mdt1"
if [ `echo $?` != 0 ];then
echo "############# mdt1资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-mdt2 ocf:lustre:Lustre target=/dev/sdd mountpoint=/mnt/mdt2"
if [ `echo $?` != 0 ];then
echo "############# mdt2资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-ost1 ocf:lustre:Lustre target=/dev/sde mountpoint=/mnt/ost1"
if [ `echo $?` != 0 ];then
echo "############# ost1资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs resource create global-ost2 ocf:lustre:Lustre target=/dev/sdf mountpoint=/mnt/ost2"
if [ `echo $?` != 0 ];then
echo "############# ost2资源创建有误,请手动查看! #############"
exit
fi
ssh root@${host_address[0]} "pcs constraint location add global-constraint-mgs global-mgs ${host_hostname[0]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt1 global-mdt1 ${host_hostname[0]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-mdt2 global-mdt2 ${host_hostname[1]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost1 global-ost1 ${host_hostname[0]} 10"
ssh root@${host_address[0]} "pcs constraint location add global-constraint-ost2 global-ost2 ${host_hostname[1]} 10"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt1"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-mdt2"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost1"
ssh root@${host_address[0]} "pcs constraint order start global-mgs then start global-ost2"
if [ `echo $?` != 0 ];then
echo "############# 资源约束创建有误,请手动查看! #############"
exit
fi
lnet_name1=`ssh root@${host_address[0]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
lnet_name2=`ssh root@${host_address[1]} "awk '{print $3}' /etc/modprobe.d/lustre.conf" | awk -F ',' '{print $2}' | awk -F '(' '{print $2}' | awk -F ')' '{print $1}'`
ssh root@${host_address[0]} "pcs resource delete ping-lnet &>/dev/null;pcs resource delete global-healthLUSTRE &> /dev/null"
ssh root@${host_address[0]} "pcs resource create ping-lnet ocf:lustre:healthLNET lctl=true multiplier=1001 device=${lnet_name1} host_list='${lnet_address[0]}@tcp2 ${lnet_address[1]}@tcp2' --clone"
ssh root@${host_address[0]} "pcs resource create global-healthLUSTRE ocf:lustre:healthLUSTRE --clone"
if [ `echo $?` != 0 ];then
echo "############# 资源监听创建有误,请手动查看! #############"
exit
fi
echo "############# 创建完毕 #############"
echo "########################## 查看集群状态 ##########################"
echo "########################################################################"
echo "########################################################################"
ssh root@${host_address[0]} "pcs status"
echo "########################################################################"
echo "########################################################################"
echo "################################################## 脚本到此全部执行完毕 ##################################################"
break
elif [ "${flag}" == "n" ];then
echo "############# 已跳过步骤-创建lustre资源 #############"
break
elif [ ${i} -eq 99 ];then
echo "############# 已退出 #############"
exit
else continue;fi
done