一、配置基础环境
所有节点保证网络正常、关闭防火墙、关闭selinux、配置节点间无密码访问、配置共享存储、配置用户认证方式(ldap或者nis)。
注意:包括下面的步骤,所有节点执行相同的操作。
二、下载Slurm源码包并解压
# wget https://download.schedmd.com/slurm/slurm-20.11.8.tar.bz2
# tar -xf slurm-20.11.8.tar.bz2
# cd slurm-20.11.8/
# ls
三、安装munge
# wget https://github.com/dun/munge/archive/refs/tags/munge-0.5.14.tar.gz
# tar -xf munge-0.5.14.tar.gz
# cd munge-munge-0.5.14/
# ls
安装munge
# ./bootstrap
# ./configure --prefix=/opt/munge --sysconfdir=/opt/munge/etc --localstatedir=/opt/munge/local --with-runstatedir=/opt/munge/run --libdir=/opt/munge/lib64
# make
#make install
四、创建用户munge并修改目录权限
# useradd -s /sbin/nologin -u 601 munge
# chown -R munge.munge /opt/munge/
# chmod 700 /opt/munge/etc/
# chmod 711 /opt/munge/local/
# chmod 755 /opt/munge/run/
# chmod 711 /opt/munge/lib/
生成key
# sudo -u munge /opt/munge/sbin/mungekey --verbose
# chmod 600 /opt/munge/etc/munge/munge.key
生成链接文件并启动服务
# ln -s /opt/munge/lib/systemd/system/munge.service /usr/lib/systemd/system/munge.service
# systemctl start munge vim /
# systemctl status munge
五、安装slurm
# ./configure --prefix=/opt/slurm --with-munge=/opt/munge --sysconfdir=/opt/slurm/etc --localstatedir=/opt/slurm/local --runstatedir=/opt/slurm/run --libdir=/opt/slurm/lib64
# make
# make install
六、配置数据库
# systemctl start mariadb.service
# systemctl enable mariadb.service
# mysql -u root
> update mysql.user set password=PASSWORD('199532') where user='root';%
> FLUSH PRIVILEGES;
> create database slurm_acct_db
> grant all on slurm_acct_db.* to 'slurm'@'192.168.100.%' identified by '199532' with grant option;
> grant all on slurm_acct_db.* to 'slurm'@'localhost' identified by '199532' with grant option;
七、编辑配置文件(示例配置文件在源码包中)
# mkdir /var/spool/slurm
# cp slurmd.conf.exemple /opt/slurm/etc/slurm.conf
# cp slurmdbd.conf.example /opt/slurm/etc/slurmdbd.conf
# cp cgroup.conf.example /opt/slurm/etc/cgroup.conf
# chmod 600 slurmdbd.conf
# vim /opt/slurm/etc/slurm.conf
# cat /opt/slurm/etc/slurm.conf | grep -v "#"
# vim /opt/slurm/etc/slurmdbd.conf
# cat /opt/slurm/etc/slurmdbd.conf | grep -v "#"
八、启动服务
# cp slurmd.service /usr/lib/systemd/system/
# cp slurmdbd.service /usr/lib/systemd/system/
# cp slurmctld.service /usr/lib/systemd/system/
# systemctl daemon-reload
# systemctl start slurmd
# systemctl start slurmdbd
# systemclt start slurmctld
# ./sinfo
# srun sleep 100