1

我的 KNL 平台基于 Intel(R) Xeon Phi(TM) CPU 7250 @ 1.40GHz,1 个节点,68 个内核,96GB 内存。我在 linpack 基准测试中有一些问题。

当我执行以下英特尔® 优化 MP LINPACK 基准测试时:/opt/intel/compilers_and_libraries_2018.5.274/linux/mkl/benchmarks/mp_linpack 我得到了大约 1700 gflops 的良好性能(N = 40k - 编译器:mpirun -np 1 ./xhpl )。

但是当我在 netlib hpl-2.3 上下载开源并安装它时,我的性能变得如此糟糕(大约 700gflops,具有相同的大小和编译器)。我认为我的问题是安装处理(编辑文件 Make.Linux_Intel64)。

大家,请帮我解决我的问题,或者如果您有任何想法以获得良好的性能,请指导我。

非常感谢。

SHELL        = /bin/sh
#
CD           = cd
CP           = cp
LN_S         = ln -fs
MKDIR        = mkdir -p
RM           = /bin/rm -f
TOUCH        = touch
#
# ----------------------------------------------------------------------
# - Platform identifier ------------------------------------------------
# ----------------------------------------------------------------------
#
#ARCH         = Linux_Intel64
ARCH          = $(arch)
#
# ----------------------------------------------------------------------
# - HPL Directory Structure / HPL library ------------------------------
# ----------------------------------------------------------------------
#
#TOPdir       = $(HOME)/hpl
TOPdir       = /home/tuyen1/HPL/hpl-2.3/install_hpl
INCdir       = $(TOPdir)/include
BINdir       = $(TOPdir)/bin/$(ARCH)
LIBdir       = $(TOPdir)/lib/$(ARCH)
#
HPLlib       = $(LIBdir)/libhpl.a
#
# ----------------------------------------------------------------------
# - Message Passing library (MPI) --------------------------------------
# ----------------------------------------------------------------------
# MPinc tells the  C  compiler where to find the Message Passing library
# header files,  MPlib  is defined  to be the name of  the library to be
# used. The variable MPdir is only used for defining MPinc and MPlib.
#
# MPdir        = /opt/intel/mpi/4.1.0
# MPinc        = -I$(MPdir)/include64
# MPlib        = $(MPdir)/lib64/libmpi.a
MPdir          =/opt/intel/compilers_and_libraries_2018.5.274/linux/mpi
MPinc        = -I$(MPdir)/include64
MPlib        = $(MPdir)/lib64/libmpi.a
# ----------------------------------------------------------------------
# - Linear Algebra library (BLAS or VSIPL) -----------------------------
# ----------------------------------------------------------------------
# LAinc tells the  C  compiler where to find the Linear Algebra  library
# header files,  LAlib  is defined  to be the name of  the library to be
# used. The variable LAdir is only used for defining LAinc and LAlib.
#
LAdir        = /opt/intel/compilers_and_libraries_2018.5.274/linux/mkl
ifndef  LAinc
LAinc        = $(LAdir)/include
endif
ifndef  LAlib
LAlib        = -L$(LAdir)/lib/intel64 \
               -Wl,--start-group \
                $(LAdir)/lib/intel64/libmkl_intel_lp64.a \
                $(LAdir)/lib/intel64/libmkl_intel_thread.a \
                $(LAdir)/lib/intel64/libmkl_core.a \
                -Wl,--end-group -lpthread -ldl
 endif
 #
 # ----------------------------------------------------------------------
 # - F77 / C interface --------------------------------------------------
 # ----------------------------------------------------------------------
 # You can skip this section  if and only if  you are not planning to use
 # a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
 # necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
 # options.  **One and only one**  option should be chosen in **each** of
 # the 3 following categories:
 #
 # 1) name space (How C calls a Fortran 77 routine)
 #
 # -DAdd_              : all lower case and a suffixed underscore  (Suns,
 #                       Intel, ...),                           [default]
 # -DNoChange          : all lower case (IBM RS6000),
 # -DUpCase            : all upper case (Cray),
 # -DAdd__             : the FORTRAN compiler in use is f2c.
 #
 # 2) C and Fortran 77 integer mapping
 #
 # -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
 # -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
 # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
 #
 # 3) Fortran 77 string handling
 #
 # -DStringSunStyle    : The string address is passed at the string loca-
 #                       tion on the stack, and the string length is then
 #                       passed as  an  F77_INTEGER  after  all  explicit
 #                       stack arguments,                       [default]
 # -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
 #                       Fortran 77  string,  and the structure is of the
 #                       form: struct {char *cp; F77_INTEGER len;},
 # -DStringStructVal   : A structure is passed by value for each  Fortran
 #                       77 string,  and  the  structure is  of the form:
 #                       struct {char *cp; F77_INTEGER len;},
 # -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
 #                       Cray  fcd  (fortran  character  descriptor)  for
 #                       interoperation.
 #
 F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
#
# ----------------------------------------------------------------------
# - HPL includes / libraries / specifics -------------------------------
# ----------------------------------------------------------------------
#
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc)
HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
#
# - Compile time options -----------------------------------------------
#
# -DHPL_COPY_L           force the copy of the panel L before bcast;
# -DHPL_CALL_CBLAS       call the cblas interface;
# -DHPL_CALL_VSIPL       call the vsip  library;
# -DHPL_DETAILED_TIMING  enable detailed timers;
#
# By default HPL will:
#    *) not copy L before broadcast,
#    *) call the BLAS Fortran 77 interface,
#    *) not display detailed timing information.
#
#HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
HPL_OPTS     = -DASYOUGO -DHYBRID
#
# ----------------------------------------------------------------------
#
HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
#
# ----------------------------------------------------------------------
# - Compilers / linkers - Optimization flags ---------------------------
# ----------------------------------------------------------------------
#
CC       = mpiicc
CCNOOPT  = $(HPL_DEFS) -O0 -w -nocompchk
OMP_DEFS = -qopenmp
#CCFLAGS  = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall
CCFLAGS  = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk
#
#
# On some platforms,  it is necessary  to use the Fortran linker to find
# the Fortran internals used in the BLAS library.
#
LINKER       = $(CC)
LINKFLAGS    = $(CCFLAGS) $(OMP_DEFS) -mt_mpi -qopenmp -nocompchk
#
ARCHIVER     = ar
ARFLAGS      = r
RANLIB       = echo

4

0 回答 0