查询已经结束作业的相关信息
sacct -j 7454119
# 指定输出格式
format=jobid,jobname,partition,nodelist,alloccpus,state,end
sacct --format=$format -j 7454119
但是使用这个命令,需要开Accounting,有两种任务数据存储方式,本地文件/数据库
本地文件
修改/etc/slurm/slurm.conf
......
# ACCOUNTING
AccountingStorageEnforce=1
AccountingStorageType=accounting_storage/filetxt
AccountingStorageLoc=/opt/slurm/acct
JobCompType=jobcomp/filetxt
JobCompLoc=/opt/slurm/jobcomp
JobAcctGatherType=jobacct_gather/linux
JobAcctGatherFrequency=30
重启服务
sudo systemctl restart slurmd
sudo systemctl restart slurmctld
尝试
$ srun hostname
$ sacct
JobID JobName Partition Account AllocCPUS State ExitCode
------------ ---------- ---------- ---------- ---------- ---------- --------
632 hostname debug (null) 0 COMPLETED 0:0
顺带看看存的啥
$ cd /opt/slurm/
$ cat acct
632 debug 1636023491 1636023491 1000 1000 - - 0 hostname 0 4294901759 1 dev (null)
632 debug 1636023491 1636023491 1000 1000 - - 0 hostname 0 4294901759 1 dev (null)
632 debug 1636023491 1636023491 1000 1000 - - 0 hostname 0 4294901759 1 dev (null)
632 debug 1636023491 1636023491 1000 1000 - - 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.00 0 0 0.00 0 0 0.00 0 0 0.00 hostname dev 0 0 0 0 (null) 4294967295
632 debug 1636023491 1636023491 1000 1000 - - 1 0 3 0 1 1 0 0 1539 0 215 0 1324 0 0 0 0 0 0 0 0 0 0 0 0 0 0 210726912 0 210726912.00 864256 0 864256.00 0 0 0.00 0 0 0.00 hostname dev 0 0 0 0 (null) 4294967295
632 debug 1636023491 1636023491 1000 1000 - - 0 hostname 0 4294901759 1 dev (null)
632 debug 1636023491 1636023491 1000 1000 - - 3 0 3 4294967295 0
$
$ cat jobcomp
JobId=632 UserId=cc(1000) GroupId=cc(1000) Name=hostname JobState=COMPLETED Partition=debug TimeLimit=UNLIMITED StartTime=2021-11-04T18:58:11 EndTime=2021-11-04T18:58:11 NodeList=dev NodeCnt=1 ProcCnt=1 WorkDir=/opt/slurm ReservationName= Gres= Account= QOS= WcKey= Cluster=unknown SubmitTime=2021-11-04T18:58:11 EligibleTime=2021-11-04T18:58:11 DerivedExitCode=0:0 ExitCode=0:0
数据库
参考
https://slurm.schedmd.com/accounting.html#user-options