我是 Trilinos 的新手。我编写了一个程序,它从文件中读取数据并用数据构造一个稀疏矩阵。全局 id 可以非常高,超过 32 位整数范围。当我将 id 更改为较小的值时。一切正常。如果我使用一个进程运行 ie mpiexec -np 1 ./myprogram
,就可以了。但是,对于多个进程,它会崩溃。
数据是这样的:文件中的每一行代表矩阵中的一行。在每一行中:第一个值是行 id,第二个值表示该行中的列数。之后,有几个索引值对。示例文件(小 id):
2000791 3 2000791 0.5 1000791 0.5 3000791 1.0
1000791 2 1000791 0.5 2000791 0.5
3000791 2 3000791 0.5 1000791 0.5
3000792 2 3000791 0.5 1000791 0.5
示例文件(大 id):
2000000000000791 3 2000000000000791 0.5 1000000000000791 0.5 3000000000000791 1.0
1000000000000791 2 1000000000000791 0.5 2000000000000791 0.5
3000000000000791 2 3000000000000791 0.5 1000000000000791 0.5
3000000000000792 2 3000000000000791 0.5 1000000000000791 0.5
从 gdb 的输出和 Trilinos 的源代码来看,错误似乎是由Epetra_BlockMap::ConstructAutoUniform: Error. Not enough space for elements on each processor
.
下面附上调试消息和我的程序的源代码。
#0 0x00007ffff58b55c9 in raise () from /lib64/libc.so.6
#1 0x00007ffff58b6cd8 in abort () from /lib64/libc.so.6
#2 0x00007ffff61b99d5 in __gnu_cxx::__verbose_terminate_handler() ()
from /lib64/libstdc++.so.6
#3 0x00007ffff61b7946 in ?? () from /lib64/libstdc++.so.6
#4 0x00007ffff61b7973 in std::terminate() () from /lib64/libstdc++.so.6
#5 0x00007ffff61b7b9f in __cxa_throw () from /lib64/libstdc++.so.6
#6 0x00000000004c6d2a in Epetra_BlockMap::ConstructAutoUniform (
this=this@entry=0x85cf00,
NumGlobal_Elements=NumGlobal_Elements@entry=2000000000000002,
Element_Size=Element_Size@entry=1,
Index_Base=Index_Base@entry=1000000000000791, comm=...,
IsLongLong=IsLongLong@entry=true)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_BlockMap.cpp:81
#7 0x00000000004c708e in Epetra_BlockMap::Epetra_BlockMap (this=0x85cf00,
NumGlobal_Elements=2000000000000002, Element_Size=1,
Index_Base=1000000000000791, comm=...)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_BlockMap.cpp:124
#8 0x0000000000497de9 in Epetra_Map::Epetra_Map (this=0x85cf00,
numGlobalElements=<optimized out>, indexBase=<optimized out>, comm=...)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetr---Type <return> to continue, or q <return> to quit---
a_Map.cpp:68
#9 0x00000000004c008f in Epetra_BasicDirectory::Generate<long long> (
this=0x85cea0, Map=...)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_BasicDirectory.cpp:276
#10 0x00000000004bf4c5 in Epetra_BasicDirectory::Epetra_BasicDirectory (
this=0x85cea0, Map=..., __in_chrg=<optimized out>,
__vtt_parm=<optimized out>)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_BasicDirectory.cpp:121
#11 0x00000000004b1ea1 in Epetra_MpiComm::CreateDirectory (
this=<optimized out>, map=...)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_MpiComm.cpp:243
#12 0x00000000004c5fcc in Epetra_BlockMap::RemoteIDList (
this=this@entry=0x7fffffffddd0, NumIDs=NumIDs@entry=0, GIDList=0x0,
PIDList=0x0, LIDList=LIDList@entry=0x0, SizeList=0x0)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_BlockMap.cpp:1336
#13 0x00000000004d741b in Epetra_CrsGraph::MakeColMap_LL (
this=this@entry=0x7fffffffddc0, domainMap=..., rangeMap=...)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_CrsGraph.cpp:1664
---Type <return> to continue, or q <return> to quit---
#14 0x00000000004d81c9 in Epetra_CrsGraph::MakeColMap (
this=this@entry=0x7fffffffddc0, domainMap=..., rangeMap=...)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_CrsGraph.cpp:1764
#15 0x00000000004d83e7 in Epetra_CrsGraph::MakeIndicesLocal (
this=this@entry=0x7fffffffddc0, domainMap=..., rangeMap=...)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_CrsGraph.cpp:1784
#16 0x0000000000462bcb in Epetra_CrsMatrix::FillComplete (
this=this@entry=0x7fffffffdd50, domain_map=..., range_map=...,
OptimizeDataStorage=OptimizeDataStorage@entry=true)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_CrsMatrix.cpp:1159
#17 0x0000000000462e81 in Epetra_CrsMatrix::FillComplete (
this=this@entry=0x7fffffffdd50,
OptimizeDataStorage=OptimizeDataStorage@entry=true)
at /home/myusername/mpi/trilinos-12.0.1-Source/packages/epetra/src/Epetra_CrsMatrix.cpp:1142
#18 0x000000000045a02c in read_and_construct (part=2, total_parts=4)
at /home/myusername/mpi/myprogram/main.cpp:99
#19 0x0000000000458905 in main (argc=1, argv=0x7fffffffdfe8)
程序源代码:
#include <Epetra_config.h>
#ifdef HAVE_MPI
# include <mpi.h>
# include <Epetra_MpiComm.h>
#else
# include <Epetra_SerialComm.h>
#endif // HAVE_MPI
#include <Epetra_Map.h>
#include <Epetra_CrsMatrix.h>
#include <Epetra_Vector.h>
#include <Epetra_Version.h>
#include <Epetra_DistObject.h>
#include <Epetra_Export.h>
#include <Epetra_Util.h>
#include <unistd.h>
#include <stdexcept>
//std libs
#include <cstdio>
#include <vector>
using namespace std;
typedef long long global_ordinal_type;
int pid;
int np;
char *path = "/home/tianxiaochen01/matrix_small.txt";
typedef long long LL;
typedef long long * T_LLP;
#ifdef HAVE_MPI
Epetra_MpiComm * comm;
#else
Epetra_SerialComm* comm;
#endif
// C style
void read_and_construct(int part,int total_parts){
FILE * matrixfile;
matrixfile = fopen(path,"r");
int len = 0;
long long src;
vector< T_LLP > arrdst;
vector< double * > arrvalue;
vector< LL > myids;
vector< int > lens;
while (fscanf(matrixfile,"%lld %d ",&src,&len) != EOF ){
T_LLP dsts = new LL [ len ];
double * values = new double [ len ];
long long dst;
double value;
for (int i=0;i<len;i++){
fscanf(matrixfile,"%lld %lf",&dst,&value);
dsts[i] = dst;
values[i] = value;
}
if ( src % (LL)total_parts == (LL)part ) {//is my part
myids.push_back(src);
lens.push_back(len);
arrdst.push_back(dsts );
arrvalue.push_back(values);
}
else {
delete [] dsts;
delete [] values;
}
}
fclose(matrixfile);
T_LLP arrmap = new LL [ myids.size() ];
for (int i=0;i<myids.size();i++){
arrmap[i] = myids[i];
}
Epetra_Map map((LL)-1, (int)myids.size(), arrmap ,(LL)0, *comm );
Epetra_Vector v1(map);
int avg_col_size = 1000;
Epetra_CrsMatrix M(Copy,map,avg_col_size);
//insert values into matrix
for (int i=0;i<myids.size();i++){
// (long long GlobalRow, int NumEntries, const double *Values, const long long *Indices)
int e = M.InsertGlobalValues(myids[i],lens[i],arrvalue[i],arrdst[i]);
}
try
{
M.FillComplete();
} catch (const Epetra_Object& ex) {
cout<<"ERROR"<<endl;
cout<<ex<<endl;
}
cout<<M<<endl;
}
void init(const Epetra_Comm& comm){
pid = comm.MyPID();
np = comm.NumProc();
}
int
main (int argc, char *argv[])
{
using std::cout;
using std::endl;
#ifdef HAVE_MPI
MPI_Init (&argc, &argv);
comm = new Epetra_MpiComm (MPI_COMM_WORLD);
init(*comm);
#else
comm = new Epetra_SerialComm;
pid = 0;
np = 1;
#endif // HAVE_MPI
read_and_construct(pid,np);
#ifdef HAVE_MPI
(void) MPI_Finalize ();
#endif // HAVE_MPI
return 0;
}
Trilinos 版本:12.0 MPI:mpich