我正在实现一个并行算法。如果没有 CyclicBarrier,我可以在一半的顺序时间内完成工作。使用 CyclicBarrier 最多可以延长 100 倍。我将包括我的线程调用和线程函数,这样您就可以看到发生了什么并尝试帮助我。CyclicBarrier 被重用并且每次都产生新线程。由于某种原因,TRY(barrier.await;) 位旋转了很长时间。
//Threads use this ...
private class threadILoop implements Runnable {
protected int start, end, j, k;
public threadILoop(int start,int end,int j,int k){
this.start = start;
this.end = end;
this.j = j;
this.k = k;
}
public void run() {
for (int z = start; z < end; z++) {
int zxj = z ^ j;
if(zxj > z){
if((z&k) == 0 && (data[z] > data[zxj]))
swap(z, zxj);
if((z&k) != 0 && (data[z] < data[zxj]))
swap(z, zxj);
}
try{barrier.await();}
catch (InterruptedException ex) { return; }
catch (BrokenBarrierException ex) {return; }
}
}
}
//Main Driver here, where the CyclicBarrier gets allocated and the threads //are spawned from.
private void loopSort() throws InterruptedException {
//print(data);
barrier = new CyclicBarrier(N_THREADS);
int kMax = data.length;
for(int k = 2; k<=kMax; k*=2){
for (int j = k/2; j > 0; j/=2) {
int piece = data.length/N_THREADS;
if(j > N_THREADS) {
//DIVIDE UP DATA SPACE FOR THREADS -> do work faster
int start = 0;
for(int i = 0; i < N_THREADS; i++)
{
int end = i == N_THREADS - 1 ? data.length : start + piece;
threads[i] = new Thread(new threadILoop(start, end, j, k));
//threads[i].start();
start = end;
}
for(int i = 0; i < N_THREADS; i++)
{
threads[i].start();
}
// print(data);
for(int i = 0; i < N_THREADS; i++)
{
threads[i].join();
}
}