利用OpenHPC搭建CentoS7.X集群
目前搭建HPC的开源工具主要有RocksCluster, Xcat,Open-HPC。其中OpenHPC 是Linux基金开源的超算项目,致力于为高性能计算构建一个开源框架,适应学术研究的需求,为 HPC 环境创建一个开源框架,降低成本。
本次采用虚拟机实验,没有Infiniband,故先不介绍Infiniband的配置
主节点系统(CentOS-7.7)配置
- 配置参数说明
环境变量 | 变量名称说明 | 模版参数 |
---|---|---|
${sms_name} | 主机名 | server |
${sms_ip} | 主节点内网地址 | 192.168.130.100 |
$sms_eth_internal | 主节点内网网卡名称 | ens33 |
${eth_provision} | 主节点与计算节点通信网卡名称 | ens33 |
${internal_netmask} | 内网子网掩码 | 255.255.255.0 |
${ntp_server} | 本地时间同步服务器 | 192.168.130.100 |
${c_ip[0]}, {c_ip[1]} ,… | 计算节点名称 | 192.168.130.[1-5] |
${c_name[0]}, ${c_name[1]} ,… | 计算节点名称 | 192.168.130.[1-5] |
${sms_ipoib} | 主节点Inifiniband网络地址 | 192.168.120.100 |
${ipoib_netmask} | 主节点Infiniband网络子网掩码 | 255.255.255.0 |
${c_ipoib[0]},&{c_ipoib[1]},… | 计算节点Infiniband网络IP地址 | 192.168.120.[1-5] |
$compute_regex | 计算节点名称正则表达式 | node[1-5] |
${compute_prefix} | 计算节点名称前缀 | node |
${kargs} | Linux内核启动参数 | net.ifnames=1 |
- 添加hosts文件信任
[root@server ~]#echo "192.168.130.1 node1" >> /etc/hosts
[root@server ~]#echo "192.168.130.2 node2" >> /etc/hosts
[root@server ~]#echo "192.168.130.3 node3" >> /etc/hosts
[root@server ~]#echo "192.168.130.4 node4" >> /etc/hosts
[root@server ~]#echo "192.168.130.5 node5" >> /etc/hosts
- 关闭防火墙
[root@server ~]#systemctl stop firewaled.sevice
[root@server ~]#systemctl disabled firewaled.sevice
- 关闭SELinux ;
[root@server ~]#vi /etc/selinux/deisabled
- 配置NTP服务 ;
[root@server ~]#echo "server 192.168.130.100" >> /etc/ntp.conf
[root@server ~]#systemctl enable ntpd.service
- 配置内网网卡 ;
[root@server ~]# vi /etc/sysconfig/network-scripts/ifcfg-ens33
TYPE=Ethernet
PROXY_METHOD=none
BROWSER_ONLY=no
BOOTPROTO=static
DEFROUTE=yes
IPV4_FAILURE_FATAL=no
IPV6INIT=yes
IPV6_AUTOCONF=yes
IPV6_DEFROUTE=yes
IPV6_FAILURE_FATAL=no
IPV6_ADDR_GEN_MODE=stable-privacy
NAME=ens33
UUID=******************************
DEVICE=ens33
ONBOOT=yes
IPADDR=192.168.130.100
PREFIX=24
IPV6_PRIVACY=no
- ohpc-base,warewulf,pbs ;
安装open-HPC yum源 ohpc-release;
[root@server ~]#yum install http://build.openhpc.community/OpenHPC:/1.3/CentOS_7/x86_64/ohpc-release-1.3-1.el7.x86_64.rpm
[root@server ~]#yum -y install ohpc-base
[root@server ~]#yum -y install ohpc-warewulf
[root@server ~]#yum -y install pbspro-server-ohpc
管理节warewulf点配置
将/etc/warewulf/provision.conf文件中的 network device = eth1 改成内网网卡ens33
[root@server ~]# vi /etc/warewulf/provision.conf
# What is the default network device that the master will use to
# communicate with the nodes?
network device = ens33
# Which DHCP server implementation should be used?
dhcp server = isc
# What is the TFTP root directory that should be used to store the
# network boot images? By default Warewulf will try and find the
# proper directory. Just add this if it can't locate it.
#tftpdir = /var/lib/tftpboot
# Automatically generate and manage a dynamnic_host virtual file
# object in the datastore? This is useful for provisioning this
# out to nodes so they always have a current /etc/hosts file.
generate dynamic_hosts = yes
# Should we manage and overwrite the local hostfile file on this
# system? This will cause all node entries to be added
# automatically to /etc/hosts.
update hostfile = yes
# If no cluster/domain is set on a node, should we add 'localdomain'
# as the default domain
use localdomain = yes
# The default kernel arguments to pass to the nodes boot kernel
default kargs = "net.ifnames=0 biosdevname=0 quiet"
编辑/etc/xinetd.d/tftp文件, 将disable = yes改为 disable = no
[root@server ~]# vi /etc/xinetd.d/tftp
# default: off
# description: The tftp server serves files using the trivial file transfer \
# protocol. The tftp protocol is often used to boot diskless \
# workstations, download configuration files to network-aware printers, \
# and to start the installation process for some operating systems.
service tftp
{
socket_type