001.SUFFIXES: .inc .f .f90 .F
002#-----------------------------------------------------------------------
003# Makefile for Intel Fortran compiler for Pentium/Athlon/Opteron
004# bases systems
005# we recommend this makefile for both Intel as well as AMD systems
006# for AMD based systems appropriate BLAS and fftw libraries are
007# however mandatory (whereas they are optional for Intel platforms)
008#
009# The makefile was tested only under Linux on Intel and AMD platforms
010# the following compiler versions have been tested:
011# - ifc.7.1 works stable somewhat slow but reliably
012# - ifc.8.1 fails to compile the code properly
013# - ifc.9.1 recommended (both for 32 and 64 bit)
014# - ifc.10.1 partially recommended (both for 32 and 64 bit)
015# tested build 20080312 Package ID: l_fc_p_10.1.015
016# the gamma only mpi version can not be compiles
017# using ifc.10.1
018#
019# it might be required to change some of library pathes, since
020# LINUX installation vary a lot
021# Hence check ***ALL*** options in this makefile very carefully
022#-----------------------------------------------------------------------
023#
024# BLAS must be installed on the machine
025# there are several options:
026# 1) very slow but works:
027# retrieve the lapackage from ftp.netlib.org
028# and compile the blas routines (BLAS/SRC directory)
029# please use g77 or f77 for the compilation. When I tried to
030# use pgf77 or pgf90 for BLAS, VASP hang up when calling
031# ZHEEV (however this was with lapack 1.1 now I use lapack 2.0)
032# 2) more desirable: get an optimized BLAS
033#
034# the two most reliable packages around are presently:
035# 2a) Intels own optimised BLAS (PIII, P4, PD, PC2, Itanium)
036# http://developer.intel.com/software/products/mkl/
037# this is really excellent, if you use Intel CPU's
038#
039# 2b) probably fastest SSE2 (4 GFlops on P4, 2.53 GHz, 16 GFlops PD,
040# around 30 GFlops on Quad core)
041# Kazushige Goto's BLAS
042# http://www.cs.utexas.edu/users/kgoto/signup_first.html
043# http://www.tacc.utexas.edu/resources/software/
044#
045#-----------------------------------------------------------------------
046
047# all CPP processed fortran files have the extension .f90
048SUFFIX=.f90
049
050#-----------------------------------------------------------------------
051# fortran compiler and linker
052#-----------------------------------------------------------------------
053#FC=ifort
054# fortran linker
055#FCL=$(FC)
056
057
058#-----------------------------------------------------------------------
059# whereis CPP -- (I need CPP, can't use gcc with proper options)
060# that's the location of gcc for SUSE 5.3
061#
062# CPP_ = /usr/lib/gcc-lib/i486-linux/2.7.2/cpp -P -C
063#
064# that's probably the right line for some Red Hat distribution:
065#
066# CPP_ = /usr/lib/gcc-lib/i386-redhat-linux/2.7.2.3/cpp -P -C
067#
068# SUSE X.X, maybe some Red Hat distributions:
069
070CPP_ = ./preprocess $*$(SUFFIX)
071
072#-----------------------------------------------------------------------
073# possible options for CPP:
074# NGXhalf charge density reduced in X direction
075# wNGXhalf gamma point only reduced in X direction
076# avoidalloc avoid ALLOCATE if possible
077# PGF89 work around some for some PGF90 / IFC bugs
078# CACHE_SIZE 1000 for PII,PIII, 5000 for Athlon, 8000-12000 P4, PD
079# RPROMU_DGEMV use DGEMV instead of DGEMM in RPRO (depends on used BLAS)
080# RACCMU_DGEMV use DGEMV instead of DGEMM in RACC (depends on used BLAS)
081# tbdyn MD package of Tomas Bucko
082#-----------------------------------------------------------------------
083
084#CPP = $(CPP_) -DHOST=\"LinuxIFC\" \
085# -DCACHE_SIZE=12000 -DPGF90 -Davoidalloc -DNGXhalf \
086# -DRPROMU_DGEMV -DRACCMU_DGEMV
087
088#-----------------------------------------------------------------------
089# general fortran flags (there must a trailing blank on this line)
090# byterecl is strictly required for ifc, since otherwise
091# the WAVECAR file becomes huge
092#-----------------------------------------------------------------------
093
094FFLAGS = -FR -lowercase -assume byterecl
095
096#-----------------------------------------------------------------------
097# optimization
098# we have tested whether higher optimisation improves performance
099# -axK SSE1 optimization, but also generate code executable on all mach.
100# xK improves performance somewhat on XP, and a is required in order
101# to run the code on older Athlons as well
102# -xW SSE2 optimization
103# -axW SSE2 optimization, but also generate code executable on all mach.
104# -tpp6 P3 optimization
105# -tpp7 P4 optimization
106#-----------------------------------------------------------------------
107
108# ifc.9.1, ifc.10.1 recommended
109OFLAG=-O2 -ip -ftz
110
111OFLAG_HIGH = $(OFLAG)
112OBJ_HIGH =
113OBJ_NOOPT =
114DEBUG = -FR -O0
115INLINE = $(OFLAG)
116
117#-----------------------------------------------------------------------
118# the following lines specify the position of BLAS and LAPACK
119# VASP works fastest with the libgoto library
120# so that's what we recommend
121#-----------------------------------------------------------------------
122
123# mkl.10.0
124# set -DRPROMU_DGEMV -DRACCMU_DGEMV in the CPP lines
125#BLAS=-L/opt/intel/mkl100/lib/em64t -lmkl -lpthread
126
127# even faster for VASP Kazushige Goto's BLAS
128# http://www.cs.utexas.edu/users/kgoto/signup_first.html
129# parallel goto version requires sometimes -libverbs
130#BLAS= /opt/libs/libgoto/libgoto.so
131BLAS=-L/opt/intel/composer_xe_2013_sp1.2.144/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread
132# LAPACK, simplest use vasp.5.lib/lapack_double
133LAPACK=
134
135# use the mkl Intel lapack
136#LAPACK= -lmkl_lapack
137
138#-----------------------------------------------------------------------
139
140#LIB = -L../vasp.5.lib -ldmy \
141# ../vasp.5.lib/linpack_double.o $(LAPACK) \
142# $(BLAS)
143#
144# options for linking, nothing is required (usually)
145LINK =
146
147#-----------------------------------------------------------------------
148# fft libraries:
149# VASP.5.2 can use fftw.3.1.X (http://www.fftw.org)
150# since this version is faster on P4 machines, we recommend to use it
151#-----------------------------------------------------------------------
152
153#FFT3D = fft3dfurth.o fft3dlib.o
154
155# alternatively: fftw.3.1.X is slighly faster and should be used if available
156#FFT3D = fftw3d.o fft3dlib.o /opt/libs/fftw-3.1.2/lib/libfftw3.a
157
158
159#=======================================================================
160# MPI section, uncomment the following lines until
161# general rules and compile lines
162# presently we recommend OPENMPI, since it seems to offer better
163# performance than lam or mpich
164#
165# !!! Please do not send me any queries on how to install MPI, I will
166# certainly not answer them !!!!
167#=======================================================================
168#-----------------------------------------------------------------------
169# fortran linker for mpi
170#-----------------------------------------------------------------------
171
172FC=mpif90 -f90=ifort
173FCL=$(FC)
174
175#-----------------------------------------------------------------------
176# additional options for CPP in parallel version (see also above):
177# NGZhalf charge density reduced in Z direction
178# wNGZhalf gamma point only reduced in Z direction
179# scaLAPACK use scaLAPACK (usually slower on 100 Mbit Net)
180# avoidalloc avoid ALLOCATE if possible
181# PGF90 work around some for some PGF90 / IFC bugs
182# CACHE_SIZE 1000 for PII,PIII, 5000 for Athlon, 8000-12000 P4, PD
183# RPROMU_DGEMV use DGEMV instead of DGEMM in RPRO (depends on used BLAS)
184# RACCMU_DGEMV use DGEMV instead of DGEMM in RACC (depends on used BLAS)
185# tbdyn MD package of Tomas Bucko
186#-----------------------------------------------------------------------
187
188#-----------------------------------------------------------------------
189
190CPP = $(CPP_) -DMPI -DHOST=\"LinuxIFC\" -DIFC \
191-DCACHE_SIZE=4000 -DPGF90 -Davoidalloc\
192-DMPI_BLOCK=8000
193## -DRPROMU_DGEMV -DRACCMU_DGEMV -DNGZhalf
194
195#-----------------------------------------------------------------------
196# location of SCALAPACK
197# if you do not use SCALAPACK simply leave that section commented out
198#-----------------------------------------------------------------------
199
200BLACS=$(HOME)/archives/SCALAPACK/BLACS/
201SCA_=$(HOME)/archives/SCALAPACK/SCALAPACK
202
203SCA= $(SCA_)/libscalapack.a \
204$(BLACS)/LIB/blacsF77init_MPI-LINUX-0.a $(BLACS)/LIB/blacs_MPI-LINUX-0.a $(BLACS)/LIB/blacsF77init_MPI-LINUX-0.a
205
206SCA=
207
208#-----------------------------------------------------------------------
209# libraries for mpi
210#-----------------------------------------------------------------------
211
212LIB = -L../vasp.5.lib -ldmy \
213../vasp.5.lib/linpack_double.o $(LAPACK) \
214$(SCA) $(BLAS)
215
216# FFT: fftmpi.o with fft3dlib of Juergen Furthmueller
217FFT3D = fftmpi.o fftmpi_map.o fft3dfurth.o fft3dlib.o
218
219# alternatively: fftw.3.1.X is slighly faster and should be used if available
220#FFT3D = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o /opt/libs/fftw-3.1.2/lib/libfftw3.a
221
222#-----------------------------------------------------------------------
223# general rules and compile lines
224#-----------------------------------------------------------------------
225BASIC= symmetry.o symlib.o lattlib.o random.o
226
227
228SOURCE= base.o mpi.o smart_allocate.o xml.o \
229constant.o jacobi.o main_mpi.o scala.o \
230asa.o lattice.o poscar.o ini.o mgrid.o xclib.o vdw_nl.o xclib_grad.o \
231radial.o pseudo.o gridq.o ebs.o \
232mkpoints.o wave.o wave_mpi.o wave_high.o \
233$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
234mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
235constrmag.o cl_shift.o relativistic.o LDApU.o \
236paw_base.o metagga.o egrad.o pawsym.o pawfock.o pawlhf.o rhfatm.o paw.o \
237mkpoints_full.o charge.o Lebedev-Laikov.o stockholder.o dipol.o pot.o \
238dos.o elf.o tet.o tetweight.o hamil_rot.o \
239steep.o chain.o dyna.o sphpro.o us.o core_rel.o \
240aedens.o wavpre.o wavpre_noio.o broyden.o \
241dynbr.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
242brent.o stufak.o fileio.o opergrid.o stepver.o \
243chgloc.o fast_aug.o fock.o mkpoints_change.o sym_grad.o \
244mymath.o internals.o dynconstr.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
245hamil_high.o nmr.o pead.o mlwf.o subrot.o subrot_scf.o \
246force.o pwlhf.o gw_model.o optreal.o davidson.o david_inner.o \
247electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
248optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
249hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
250lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
251linear_optics.o linear_response.o \
252setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
253ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o local_field.o \
254ump2.o bse_te.o bse.o acfdt.o chi.o sydmat.o dmft.o \
255rmm-diis_mlr.o linear_response_NMR.o
256
257vasp: $(SOURCE) $(FFT3D) $(INC) main.o
258rm -f vasp
259$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
260makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
261$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
262zgemmtest: zgemmtest.o base.o random.o $(INC)
263$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
264dgemmtest: dgemmtest.o base.o random.o $(INC)
265$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
266ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
267$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
268kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
269$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)
270
271clean:
272-rm -f *.g *.f *.o *.L *.mod ; touch *.F
273
274main.o: main$(SUFFIX)
275$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
276xcgrad.o: xcgrad$(SUFFIX)
277$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
278xcspin.o: xcspin$(SUFFIX)
279$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)
280
281makeparam.o: makeparam$(SUFFIX)
282$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)
283
284makeparam$(SUFFIX): makeparam.F main.F
285#
286# MIND: I do not have a full dependency list for the include
287# and MODULES: here are only the minimal basic dependencies
288# if one strucuture is changed then touch_dep must be called
289# with the corresponding name of the structure
290#
291base.o: base.inc base.F
292mgrid.o: mgrid.inc mgrid.F
293constant.o: constant.inc constant.F
294lattice.o: lattice.inc lattice.F
295setex.o: setexm.inc setex.F
296pseudo.o: pseudo.inc pseudo.F
297poscar.o: poscar.inc poscar.F
298mkpoints.o: mkpoints.inc mkpoints.F
299wave.o: wave.F
300nonl.o: nonl.inc nonl.F
301nonlr.o: nonlr.inc nonlr.F
302
303$(OBJ_HIGH):
304$(CPP)
305$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
306$(OBJ_NOOPT):
307$(CPP)
308$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)
309
310fft3dlib_f77.o: fft3dlib_f77.F
311$(CPP)
312$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)
313
314.F.o:
315$(CPP)
316$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
317.F$(SUFFIX):
318$(CPP)
319$(SUFFIX).o:
320$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
321
322# special rules
323#-----------------------------------------------------------------------
324# these special rules are cummulative (that is once failed
325# in one compiler version, stays in the list forever)
326# -tpp5|6|7 P, PII-PIII, PIV
327# -xW use SIMD (does not pay of on PII, since fft3d uses double prec)
328# all other options do no affect the code performance since -O1 is used
329
330fft3dlib.o : fft3dlib.F
331$(CPP)
332$(FC) -FR -lowercase -O2 -c $*$(SUFFIX)
333
334fft3dfurth.o : fft3dfurth.F
335$(CPP)
336$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
337
338fftw3d.o : fftw3d.F
339$(CPP)
340$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
341
342wave_high.o : wave_high.F
343$(CPP)
344$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
345
346radial.o : radial.F
347$(CPP)
348$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
349
350symlib.o : symlib.F
351$(CPP)
352$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
353
354symmetry.o : symmetry.F
355$(CPP)
356$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
357
358wave_mpi.o : wave_mpi.F
359$(CPP)
360$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
361
362wave.o : wave.F
363$(CPP)
364$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
365
366dynbr.o : dynbr.F
367$(CPP)
368$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
369
370asa.o : asa.F
371$(CPP)
372$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
373
374broyden.o : broyden.F
375$(CPP)
376$(FC) -FR -lowercase -O2 -c $*$(SUFFIX)
377
378us.o : us.F
379$(CPP)
380$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
381
382LDApU.o : LDApU.F
383$(CPP)
384$(FC) -FR -lowercase -O2 -c $*$(SUFFIX)