编辑编辑:这是我在第二条评论中使用该程序后的问题,原始帖子如下。
> ./bspsieve 8 10 processors to use: 8
upper bound: 10
It took : 0.000076 seconds for proc 0 out of 8.
*** glibc detected *** ./bspsieve: munmap_chunk(): invalid pointer: 0x000000000060d040 ***
======= Backtrace: =========
/lib64/libc.so.6(+0x75218)[0x7fbacd7c2218]
./bspsieve[0x4015f7]
./bspsieve[0x401728]
/lib64/libc.so.6(__libc_start_main+0xe6)[0x7fbacd76bbc6]
./bspsieve[0x401119]
======= Memory map: ========
00400000-0040b000 r-xp 00000000 00:1a 5278668 /vsc-mounts/leuven-user/gst/guest043/ParCoSieve/MulticoreBSP-for-C/bspsieve
0060b000-0060c000 r--p 0000b000 00:1a 5278668 /vsc-mounts/leuven-user/gst/guest043/ParCoSieve/MulticoreBSP-for-C/bspsieve
0060c000-0060d000 rw-p 0000c000 00:1a 5278668 /vsc-mounts/leuven-user/gst/guest043/ParCoSieve/MulticoreBSP-for-C/bspsieve
0060d000-0062e000 rw-p 00000000 00:00 0 [heap]
7fba48000000-7fba48021000 rw-p 00000000 00:00 0
7fba48021000-7fba4c000000 ---p 00000000 00:00 0
7fba4d26e000-7fba4d284000 r-xp 00000000 08:06 7531397 /lib64/libgcc_s.so.1
7fba4d284000-7fba4d483000 ---p 00016000 08:06 7531397 /lib64/libgcc_s.so.1
7fba4d483000-7fba4d484000 r--p 00015000 08:06 7531397 /lib64/libgcc_s.so.1
7fba4d484000-7fba4d485000 rw-p 00016000 08:06 7531397 /lib64/libgcc_s.so.1
7fbacd74d000-7fbacd8a2000 r-xp 00000000 08:06 7531274 /lib64/libc-2.11.1.so
7fbacd8a2000-7fbacdaa1000 ---p 00155000 08:06 7531274 /lib64/libc-2.11.1.so
7fbacdaa1000-7fbacdaa5000 r--p 00154000 08:06 7531274 /lib64/libc-2.11.1.so
7fbacdaa5000-7fbacdaa6000 rw-p 00158000 08:06 7531274 /lib64/libc-2.11.1.so
7fbacdaa6000-7fbacdaab000 rw-p 00000000 00:00 0
7fbacdaab000-7fbacdb00000 r-xp 00000000 08:06 7531290 /lib64/libm-2.11.1.so
7fbacdb00000-7fbacdcff000 ---p 00055000 08:06 7531290 /lib64/libm-2.11.1.so
7fbacdcff000-7fbacdd00000 r--p 00054000 08:06 7531290 /lib64/libm-2.11.1.so
7fbacdd00000-7fbacdd01000 rw-p 00055000 08:06 7531290 /lib64/libm-2.11.1.so
7fbacdd01000-7fbacdd09000 r-xp 00000000 08:06 7531329 /lib64/librt-2.11.1.so
7fbacdd09000-7fbacdf08000 ---p 00008000 08:06 7531329 /lib64/librt-2.11.1.so
7fbacdf08000-7fbacdf09000 r--p 00007000 08:06 7531329 /lib64/librt-2.11.1.so
7fbacdf09000-7fbacdf0a000 rw-p 00008000 08:06 7531329 /lib64/librt-2.11.1.so
7fbacdf0a000-7fbacdf21000 r-xp 00000000 08:06 7531435 /lib64/libpthread-2.11.1.so
7fbacdf21000-7fbace121000 ---p 00017000 08:06 7531435 /lib64/libpthread-2.11.1.so
7fbace121000-7fbace122000 r--p 00017000 08:06 7531435 /lib64/libpthread-2.11.1.so
7fbace122000-7fbace123000 rw-p 00018000 08:06 7531435 /lib64/libpthread-2.11.1.so
7fbace123000-7fbace127000 rw-p 00000000 00:00 0
7fbace127000-7fbace146000 r-xp 00000000 08:06 17398545 /lib64/ld-2.11.1.so
7fbace30b000-7fbace30f000 rw-p 00000000 00:00 0
7fbace343000-7fbace345000 rw-p 00000000 00:00 0
7fbace345000-7fbace346000 r--p 0001e000 08:06 17398545 /lib64/ld-2.11.1.so
7fbace346000-7fbace347000 rw-p 0001f000 08:06 17398545 /lib64/ld-2.11.1.so
7fbace347000-7fbace348000 rw-p 00000000 00:00 0
7fff10a45000-7fff10a5a000 rw-p 00000000 00:00 0 [stack]
7fff10ae3000-7fff10ae4000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
Aborted (core dumped)
================= 原帖
我正在尝试使用 BSP ( http://en.wikipedia.org/wiki/Bulk_synchronous_parallel ) 库在 C 中实现 Erastothenes 筛的简单并行版本。
我对C和BSP都没有经验。到目前为止,我有 2 个问题:1)编译(按照说明完成)并尝试使用 ./bspsieve 200 运行程序后,我得到“分段错误(核心转储)”
2)我做错了什么其他事情吗?我不是在寻找一种好的算法,只是在寻找一种能给出预期结果的算法。
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mcbsp.h>
/*
Note: To compile, this file has to be in the same folder as mcbsp.h and you need the 2 following commands:
gcc -Iinclude/ -pthread -c -o bspsieve.o bspsieve.c
gcc -o bspsieve bspsieve.o lib/libmcbsp1.1.0.a -lpthread -lrt
*/
int procs;
float upperbound;
int *primes;
//SPMD function
void bspSieve(){
bsp_begin(procs);
float p = bsp_nprocs(); // p = number of procs obtained
int s = bsp_pid(); // s = proc number
float blocksize; // block size to be used, note last proc has a different size!
if( s != p){
blocksize = ceil(upperbound/p);
} else {
blocksize = upperbound - (p-1)*ceil(upperbound/p);
}
// Initialize start time and end time, set start time to now.
double start_time,end_time;
start_time = bsp_time();
// Create vector that has block of candidates
int *blockvector;
blockvector = (int *)malloc(blocksize*sizeof(int));
int q;
for(q = 0; q<blocksize; q++){
//List contains the integers from s*blocksize till blocksize + s*blocksize
blockvector[q] = q + s*blocksize;
}
//We neglect the first 2 'primes' in processor 0.
if(s == 0){
blockvector[0] = 0;
blockvector[1] = 0;
}
// We are using the block distribution. We assume that n is large enough to
// assure that n/p is larger than sqrt(n). This means that we will always find the
// sieving prime in the first block, and so have to broadcast from the first
// processor to the others.
long sieving_prime;
int i;
bsp_push_reg( &sieving_prime,sizeof(long) );
for(i = 2; i * i < upperbound; i++) {
//Part 1: if first processor, get the newest sieving prime, broadcast. Search for newest prime starting from i.
if(s == 0){
int findPrimeNb;
for(findPrimeNb = i; findPrimeNb <= blocksize; findPrimeNb++) {
if( blockvector[findPrimeNb] != 0) {
sieving_prime = blockvector[findPrimeNb];
//broadcast
int procNb;
for(procNb = 0; procNb < p; ++procNb){
bsp_put(procNb, &sieving_prime,&sieving_prime,0,sizeof(long));
}
break;
}
}
}
bsp_sync();
//Part 2: Sieve using the sieving prime
int sievingNb;
for(sievingNb = 0; sievingNb < blocksize; sievingNb++){
//check if element is multiple of sieving prime, if so, pcross out (put to zero)
if( blockvector[sievingNb] % sieving_prime == 0){
blockvector[sievingNb] = 0;
}
}
}
//part 3: get local primes to central area
int transferNb;
long transferPrime;
for(transferNb = 0; transferNb < blocksize; transferNb++){
transferPrime = blockvector[transferNb];
primes[transferPrime] = transferPrime;
}
// take the end time.
end_time = bsp_time();
//Print amount of taken time, only processor 0 has to do this.
if( s == 0 ){
printf("It took : %.6lf seconds for proc %d out of %d. \n", end_time-start_time, bsp_pid(), bsp_nprocs());
fflush(stdout);
}
bsp_end();
}
int main(int argc, char **argv){
primes = (int *)malloc(upperbound*sizeof(int));
if(argc != 2) {
printf( "processors to use: %s ", argv[ 0 ] );
printf( "upper bound: %s ", argv[ 1 ] );
}
//retrieve parameters
procs = atoi( argv[ 1 ] );
upperbound = atoi( argv[ 2 ] );
// init and call parallel part
bsp_init(bspSieve, argc, argv);
bspSieve();
//Print all non zeros of candidates, these are the primes.
// Primes only go to p*p <= n
int i;
for(i = 0; i*i <= upperbound; i++) {
if(primes[i] > 0) {
printf("%d, ",primes[i]);
}
}
return 0;
}
编辑:我已经完成了以下操作来修复 mathematician1975 注意到的错误,但是我仍然得到分段错误。
int main(int argc, char **argv){
if(argc != 2) {
printf( "processors to use: %s ", argv[ 0 ] );
printf( "upper bound: %s ", argv[ 1 ] );
}
//retrieve parameters
procs = atoi( argv[ 1 ] );
upperbound = atoi( argv[ 2 ] );
primes = (int *)malloc(upperbound*sizeof(int));
// init and call parallel part
bsp_init(bspSieve, argc, argv);
bspSieve();
//Print all non zeros of candidates, these are the primes.
// Primes only go to p*p <= n
int i;
for(i = 0; i*i <= upperbound; i++) {
if(primes[i] > 0) {
printf("%d, ",primes[i]);
}
}
return 0;
}