1.作业相关
#check available nodes
$ sinfo
#interactive job
$ srun -p <partition> -w <node> -c <cpus> --pty /bin/bash
#background job
$ sbatch test.sbatch
#details of test.sbatch
#!/bin/bash
#SBATCH -J test #job name
#SBATCH -p <partition name> #partition
#SBATCH -n 1 #ntasks
#SBATCH -c 10 #cpus per task
#SBATCH --mem-per-cpu=<MB> #mem per cpu
#SBATCH --gres=gpu:1 #1 gpu
#SBATCH -o test-%j.out #out.log
#SBATCH -e test-%j.err #error.log
<command>
#check status
$ squeue
#cancel job
$ scancel <jobID>
2.管理相关
#update job
$ scontrol update jobid=<jobID> jobname=<job newname>
$ scontrol update jobid=<jobID> partition=<partition name>
#update node
$ scontrol update nodename=<node name> state=resume
#show info
$ scontrol show partition <partition name>
$ scontrol show node <node name>
$ scontrol show job <jobID>
#check history
$ sacct -u username -S 2023-07-01 -E now --field=jobid,partition,jobname,user,nnodes,nodelist,start,end,elapsed,alloccpus,state
$ sacctmgr list assoc
$ sacctmgr list account
$ seff <jobID>
官方文档https://slurm.schedmd.com/quickstart.html