0

我正在实现一个并行算法。如果没有 CyclicBarrier,我可以在一半的顺序时间内完成工作。使用 CyclicBarrier 最多可以延长 100 倍。我将包括我的线程调用和线程函数,这样您就可以看到发生了什么并尝试帮助我。CyclicBarrier 被重用并且每次都产生新线程。由于某种原因,TRY(barrier.await;) 位旋转了很长时间。

//Threads use this ...
private class threadILoop implements Runnable {
    protected int start, end, j, k;
    public threadILoop(int start,int end,int j,int k){
        this.start = start;
        this.end = end;
        this.j = j;
        this.k = k;
    }
    public void run() {
        for (int z = start; z < end; z++) {

            int zxj = z ^ j;
            if(zxj > z){
                if((z&k) == 0 && (data[z] > data[zxj]))
                    swap(z, zxj);
                if((z&k) != 0 && (data[z] < data[zxj]))
                    swap(z, zxj);
            }

            try{barrier.await();}
            catch (InterruptedException ex) { return; }
            catch (BrokenBarrierException ex) {return; }
        }
    }
}
//Main Driver here, where the CyclicBarrier gets allocated and the threads //are spawned from. 
 private void loopSort() throws InterruptedException {
        //print(data);
        barrier = new CyclicBarrier(N_THREADS);
        int kMax = data.length;
        for(int k = 2; k<=kMax; k*=2){
            for (int j = k/2; j > 0; j/=2) {

                int piece = data.length/N_THREADS;

                if(j > N_THREADS) {
                    //DIVIDE UP DATA SPACE FOR THREADS -> do work faster
                    int start = 0;
                    for(int i = 0; i < N_THREADS; i++)
                        {
                            int end =  i == N_THREADS - 1 ? data.length : start + piece;
                            threads[i] = new Thread(new threadILoop(start, end, j, k));
                            //threads[i].start();
                            start = end;
                        }

                    for(int i = 0; i < N_THREADS; i++)
                        {
                            threads[i].start();
                        }




                    // print(data);

                    for(int i = 0; i < N_THREADS; i++)
                        {
                            threads[i].join();
                        }
                }





4

1 回答 1

0

您在循环中的障碍太远了,现在每个线程都有一系列元素要处理,它们都处理一个元素,等待所有线程,下一个进程,等待等等。在这种情况下,线程之间等待和通信的开销变得比实际处理要多得多。

在与其他线程对齐之前尝试处理更多元素,例如在整个范围内处理,然后等待。

//Threads use this ...
private class threadILoop implements Runnable {
    protected int start, end, j, k;
    public threadILoop(int start,int end,int j,int k){
        this.start = start;
        this.end = end;
        this.j = j;
        this.k = k;
    }
    public void run() {
        for (int z = start; z < end; z++) {    
            int zxj = z ^ j;
            if(zxj > z){
                if((z&k) == 0 && (data[z] > data[zxj]))
                    swap(z, zxj);
                if((z&k) != 0 && (data[z] < data[zxj]))
                    swap(z, zxj);
            }
            // Wait moved from here
        }
        // To here (outside the inner loop)
        try{barrier.await();}
        catch (InterruptedException ex) { return; }
        catch (BrokenBarrierException ex) {return; }
    }
}
于 2020-02-03T20:22:04.057 回答