https://condor-wiki.cs.wisc.edu/index.cgi/wiki?p=BuildingCondorOnUnix
source config.sh.in
cd ../src
[root@se src]# chmod -R +x ./
[root@se src]# ./build_init
./configure;make
make release
after make release
cd src/release_dir
condor_init //it generates condor_config.local in etc dir, and log, spool in
[root@se release_dir]# sbin/condor_init
Creating /home/condor/log
Creating /home/condor/spool
Creating /home/condor/execute
Creating /home/zhxue/100630-condor-mysrc/condor-7.4.2/src/release_dir/etc/condor_config.local
Condor has been initialized, but not started.
[root@se release_dir]# condor_master
[root@se release_dir]# ps aux |grep condor
condor 10718 0.8 0.0 27684 2044 ? Ss 21:24 0:00 condor_master
root 10720 0.0 0.0 61204 752 pts/2 S+ 21:24 0:00 grep condor
问题:
好久没用condor,最后一步时,发现如下问题:
[root@mpi002 release_dir]# sbin/condor_init
Error determining who should own the Condor-related directories.
Either create a "condor" account, or set the CONDOR_IDS environment
variable to the valid uid.gid pair that should be used by Condor.
加了环境变量等,指定IDS都不行。
最后到
[root@mpi002 release_dir]# sbin/condor_install --install ./ --owner daemon
这个问题:
11/09 11:13:35 JavaDetect: failure status 256 when executing /usr/bin/java -Xmx1024m1340m -classpath /opt/condor-7.4.2/lib:/opt/condor-7.4.2/lib/scimark2lib.jar:. CondorJavaInfo old 2
解决:
1)
[root@mpi002 condor-7.4.2]# condor_starter -classad
CondorVersion = "$CondorVersion: 7.4.2 Nov 9 2011 $"
IsDaemonCore = True
HasFileTransfer = True
HasPerFileEncryption = True
HasReconnect = True
HasMPI = True
HasTDP = True
HasJobDeferral = True
HasJICLocalConfig = True
HasJICLocalStdin = True
Invalid maximum heap size: -Xmx512m1340m
Could not create the Java virtual machine.
HasVM = True
2) 修改local config文件
JAVA_MAXHEAP_ARGUMENT = -Xmx1024 改为
JAVA_MAXHEAP_ARGUMENT = -Xmx
3)tail -f -n 100 /opt/condor-7.4.2/local.mpi002/log/StartLog
11/09 14:31:28 attempt to connect to <192.168.137.2:42303> failed: timed out after 20 seconds.
11/09 14:31:28 ERROR: SECMAN:2003:TCP auth connection to <192.168.137.2:42303> failed.
11/09 14:31:28 Failed to send alive to <192.168.137.2:42303>, will try again...