单机多核环境下 HPL安装小结

 在安装HPL之前,系统中必须已经安装了编译器、并行环境MPI以及基本线性代数子方程(BLAS)或矢量图形信号处理库(VSIPL)两者之一。

编译器必须支持C语言和Fortran77语言。并行环境MPI一般采用MPICH,当然也可以是其它版本的MPI,如LAMMPIHPL运行需要BLAS库或者VSIPL库,且库的性能对最终测得的Linpack性能有密切的关系。常用的BLAS库有GOTOAtlasACMLESSLMKL等,

并行环境MPI我采用的是安装Infi-MPIBLAS库我选择的是GotoBLASHPL [url]www.netlib.org/benchmark/hpl[/url] 网站上下载HPLhpl.tar.gz,目前HPL的最新版本为2.0

使用root帐户

具体步骤如下:

一.    Goto Blas 的安装 (GOTOBLAS2007-07-07 18:29下载

GotoBLAS-1.15.tar.gz

1.cp GotoBLAS-1.15.tar.gz /usr/local/share/

tar xzvf GotoBLAS-1.15.tar.gz

cd GotoBLAS

 

2.如果机器是32位的

./quickbuild.32bit

64位的,则运行  ./quickbuild.32bit

 

 

3、编辑Makefile.rule,详细情况见附件;更改getarch.c里面的archtecture,使之符合自己的情况,即选择自己机器的相应配置。

Makefile.rule

 

#

#  Beginning of user configuration

#

 

# This library's version

REVISION = -r1.26

 

# Which C compiler do you prefer? Default is gcc.

 C_COMPILER = GNU

# C_COMPILER = INTEL

# C_COMPILER = PGI

 

# Now you don't need Fortran compiler to build library.

# If you don't spcifly Fortran Compiler, GNU g77 compatible

# interface will be used.

# F_COMPILER = G77

# F_COMPILER = G95

# F_COMPILER = GFORTRAN

 F_COMPILER = INTEL

# F_COMPILER = PGI

# F_COMPILER = PATHSCALE

# F_COMPILER = IBM

# F_COMPILER = COMPAQ

# F_COMPILER = SUN

# F_COMPILER = F2C

 

# If you need 64bit binary; some architecture can accept both 32bit and

# 64bit binary(X86_64, SPARC, Power/PowerPC or WINDOWS).

#BINARY64  = 1

 

# If you want to build threaded BLAS

 SMP = 1

 

# You can define maximum number of threads. Basically it should be

# less than actual number of cores. If you don't specify one, it's

# automatically detected by script.

 MAX_THREADS = 16

 

# If you want to use legacy threaded Level 3 implementation.

# Some architecture prefer this algorithm, but it's rare.

# USE_SIMPLE_THREADED_LEVEL3 = 1

 

# If you want to use GotoBLAS with accerelator like Cell or GPGPU

# This is experimental and currently won't work well.

# USE_ACCERELATOR = 1

 

# Define accerelator type (won't work)

# USE_CELL_SPU = 1

 

# Theads are still working for a while after finishing BLAS operation

# to reduce thread activate/deactivate overhead. You can determine

# time out to improve performance. This number should be from 4 to 30

# which corresponds to (1 << n) cycles. For example, if you set to 26,

# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz

# system). Also you can control this mumber by GOTO_THREAD_TIMEOUT

# CCOMMON_OPT += -DTHREAD_TIMEOUT=26

 

# If you need cross compiling

# (you have to set architecture manually in getarch.c!)

# Example : HOST ... G5 OSX,  TARGET = CORE2 OSX

# CROSS_SUFFIX    = i686-apple-darwin8-

# CROSS_VERSION   = -4.0.1

# CROSS_BINUTILS  =

 

# If you need Special memory management;

# Using HugeTLB file system(Linux / AIX / Solaris)

# HUGETLB_ALLOCATION = 1

 

# Using bigphysarea memory instead of normal allocation to get

# physically contiguous memory.

# BIGPHYSAREA_ALLOCATION = 1

 

# To get maxiumum performance with minimum impact to the system,

# mixing memory allocation may be worth to try. In this case,

# you have to define one of ALLOC_HUGETLB or BIGPHYSAREA_ALLOCATION.

# Another allocation will be done by mmap or static allocation.

# (Not implemented yet)

# MIXED_MEMORY_ALLOCATION = 1

 

# Using static allocation instead of dynamic allocation

# You can't use it with ALLOC_HUGETLB

 STATIC_ALLOCATION = 1

 

# If you want to use CPU affinity

# CCOMMON_OPT += -DUSE_CPU_AFFINITY

 

# If you want to use memory affinity (NUMA)

# You can't use it with ALLOC_STATIC

# NUMA_AFFINITY   = 1

 

# If you want to use interleaved memory allocation.

# Default is local allocation(it only works with NUMA_AFFINITY).

# CCOMMON_OPT += -DINTERLEAVED_MAPPING

 

# If you want to drive whole 64bit region by BLAS. Not all Fortran

# compiler supports this. It's safe to keep comment it out if you

# are not sure.

# INTERFACE64 = 1

 

# If you have special compiler to run script to determine architecture.

GETARCH_CC +=

GETARCH_FLAGS +=

 

#

#  End of user configuration

#

 

ifdef BINARY32

BINARY64 =

endif

 

ifndef GOTOBLAS_MAKEFILE

export GOTOBLAS_MAKEFILE = 1

MACHINE =

OSNAME  =

PGCPATH =

ARCH =

SUBARCH =

ARCHSUBDIR =

CONFIG =

FU =

LIBSUBARCH =

CORE =

endif

 

ifndef MACHINE

MACHINE := $(shell uname -m | sed -e s/i.86/i386/)

endif

ifndef OSNAME

OSNAME  := $(shell uname -s | sed -e s//-.*//)

endif

 

ifneq ($(OSNAME), Darwin)

ifneq ($(OSNAME), CYGWIN_NT)

ifeq ($(MACHINE), i386)

BINARY64   =

NATIVEARCH = YES

endif

endif

endif

 

ifeq ($(MACHINE), ia64)

BINARY64   = YES

NATIVEARCH = YES

endif

 

ifeq ($(MACHINE), alpha)

BINARY64   = YES

NATIVEARCH = YES

endif

 

ifeq ($(OSNAME), AIX)

NATIVEARCH = YES

GETARCH_FLAGS += -maix64

endif

 

ifeq ($(OSNAME), Darwin)

ifndef BINARY64

NATIVEARCH = YES

endif

EXTRALIB   += -lSystemStubs

endif

 

# If you need to access over 4GB chunk on 64bit system.

ifdef BINARY64

CCOMMON_OPT   += -D__64BIT__

GETARCH_FLAGS += -D__64BIT__

ifdef INTERFACE64

CCOMMON_OPT   += -DUSE64BITINT

endif

endif

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值