1

我是 OpenMP 的新手。我试图实现合并排序的并行版本。我的串行实现的代码与此相同,但我没有使用 parallelMergeSort 函数。我的并行实现如下:

//PARALLEL MERGE SORT
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<omp.h>

void merge(int arr[], int indexA, int indexB, int end, int arrOut[]);
void mergeSort(int arr[], int inf, int sup, int arrOut[]);
void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int threads);


int main(int argc, char *argv[]){
    int threads, availableThreads;
    int N = 10;
    int my_array[N];
    int outputArray[N];
    int length = sizeof(my_array) / sizeof(my_array[0]);
    srand(time(NULL));
    int i;
    for (i=0; i<N; i++){
        my_array[i]=rand()%100 + 1;
    }
    //print the array 
    for (i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }   

    availableThreads=omp_get_max_threads();
    printf("Numero massimo di threads disponibile: %d\n", availableThreads);

    printf("Inserisci numero di threads non superiore a %d: \n", availableThreads);
    scanf("%d", &threads);

    omp_set_nested(1);
    if (omp_get_nested() != 1)
        printf("Errore nested parallelism\n");

    int processori = omp_get_num_procs(); 
    printf("Processori: %d\n", processori);

    if(threads>processori)
        omp_set_num_threads(threads);

    printf("\n--------------\n");

    double start=omp_get_wtime();
    parallelMergeSort(my_array, 0, length-1, outputArray, threads); 
    double end=omp_get_wtime();
    printf("Time: %f", end-start);

    for(i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }
    printf("\n");
} 



void merge(int arr[], int indexA, int indexB, int end, int arrOut[]){
    int i=indexA, j=indexB, k=indexA;
    while(i<=indexB-1 && j<=end){
        if(arr[i]<arr[j]){
            //i=i+1;
            arrOut[k]=arr[i++];
        }
        else{
            //j=j+1;
            arrOut[k]=arr[j++];
        }
        k++;
    }
    while(i<=indexB-1){
        //i++;
        arrOut[k]=arr[i++];
        k++;
    }
    while(j<=end){
        //j++;
        arrOut[k]=arr[j++];
        k++;
    }
    for(i=indexA; i<=end; i++)
        arr[i]=arrOut[i];
}

void mergeSort(int arr[], int inf, int sup, int arrOut[]){
    int medium;
    if(inf<sup){
        medium=(inf+sup)/2;
        mergeSort(arr, inf, medium, arrOut);
        mergeSort(arr, medium+1, sup, arrOut);
        merge(arr, inf, medium+1, sup, arrOut);
    }
}

void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int threads){
    if (threads==1)
        mergeSort(arr, inf, sup, arrOut);
    else if(threads>1){
        #pragma omp parallel sections
        {
            #pragma omp section
            {
            parallelMergeSort(arr, inf, (inf+sup)/2, arrOut, threads);}
            #pragma omp section
            {
            parallelMergeSort(arr, (inf+sup)/2 + 1, sup, arrOut, threads-threads/2);}
        }
        
    }
}

我在 parallelMergeSort 函数中有错误,因为printf("\n--------------\n");已打印。错误是标题中的错误:libgomp: Thread creation failed: Resource temporarily unavailable。我不知道错误在哪里。我尝试按照以下示例实现:OpenMP 中的并行合并排序 我不认为这是资源问题,因为简单的程序可以工作。显然我在那个函数中犯了一些错误......

编辑:如果我使用 2 个或 3 个或 4 个线程等,都会发生这种情况。

编辑:如果我使用 OMP_DISPLAY_ENV=TRUE

OPENMP DISPLAY ENVIRONMENT BEGIN
  _OPENMP = '201511'
  OMP_DYNAMIC = 'FALSE'
  OMP_NESTED = 'FALSE'
  OMP_NUM_THREADS = '8'
  OMP_SCHEDULE = 'DYNAMIC'
  OMP_PROC_BIND = 'FALSE'
  OMP_PLACES = ''
  OMP_STACKSIZE = '140422814161920'
  OMP_WAIT_POLICY = 'PASSIVE'
  OMP_THREAD_LIMIT = '4294967295'
  OMP_MAX_ACTIVE_LEVELS = '2147483647'
  OMP_CANCELLATION = 'FALSE'
  OMP_DEFAULT_DEVICE = '0'
  OMP_MAX_TASK_PRIORITY = '0'
OPENMP DISPLAY ENVIRONMENT END
14 26 52 67 39 9 64 27 58 50 
9 14 26 27 39 50 52 58 64 67 

编辑:

//PARALLEL MERGE SORT
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<omp.h>

void merge(int arr[], int indexA, int indexB, int end, int arrOut[]);
void mergeSort(int arr[], int inf, int sup, int arrOut[]);
void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int threads);


int main(int argc, char *argv[]){
    int threads, availableThreads, level=0;
    int N = 1000;
    int my_array[N];
    int outputArray[N];
    int length = sizeof(my_array) / sizeof(my_array[0]);
    srand(time(NULL));
    int i;
    for (i=0; i<N; i++){
        my_array[i]=rand()%100 + 1;
    }
    //print the array 
    for (i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }   

    availableThreads=omp_get_max_threads();
    printf("Numero massimo di threads disponibile: %d\n", availableThreads);

    //printf("Inserisci numero di threads non superiore a %d: \n", availableThreads);
    //scanf("%d", &threads);

    //omp_set_nested(1);
    //if (omp_get_nested() != 1)
        //printf("Errore nested parallelism\n");

    int processori = omp_get_num_procs(); 
    printf("Processori: %d\n", processori);

    //if(threads>processori)
        //omp_set_num_threads(threads);

    printf("\n--------------\n");

    double start=omp_get_wtime();
    parallelMergeSort(my_array, 0, length-1, outputArray, level); 
    double end=omp_get_wtime();
    printf("Time: %f", end-start);

    for(i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }
    printf("\n");
} 



void merge(int arr[], int indexA, int indexB, int end, int arrOut[]){
    int i=indexA, j=indexB, k=indexA;
    while(i<=indexB-1 && j<=end){
        if(arr[i]<arr[j]){
            //i=i+1;
            arrOut[k]=arr[i++];
        }
        else{
            //j=j+1;
            arrOut[k]=arr[j++];
        }
        k++;
    }
    while(i<=indexB-1){
        //i++;
        arrOut[k]=arr[i++];
        k++;
    }
    while(j<=end){
        //j++;
        arrOut[k]=arr[j++];
        k++;
    }
    for(i=indexA; i<=end; i++)
        arr[i]=arrOut[i];
}

void mergeSort(int arr[], int inf, int sup, int arrOut[]){
    int medium;
    if(inf<sup){
        medium=(inf+sup)/2;
        mergeSort(arr, inf, medium, arrOut);
        mergeSort(arr, medium+1, sup, arrOut);
        merge(arr, inf, medium+1, sup, arrOut);
    }
}

void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int level){
    if (level==0){
        #pragma omp parallel
        #pragma omp single
        parallelMergeSort(arr, inf, sup, arrOut, 1);
    }
    else if(level<8){
        #pragma omp task shared(arr, arrOut)
        {
            parallelMergeSort(arr, inf, (inf+sup)/2, arrOut, level+1);
            parallelMergeSort(arr, (inf+sup)/2 + 1, sup, arrOut, level+1);
        }
    }
    else{
        mergeSort(arr, inf, sup, arrOut);
    }   
}

这是最后一个代码。如果我写 level=8 它可以工作,但是如果我写例如 level=0 或 level=1 数组将不会被排序。编辑:如果我在代码中写了与 8 不同的东西,我有Segmentation error. 特别是如果我增加数组的随机元素的数量。还有1000个级别= 8的元素我有这个分割错误

4

1 回答 1

3

你真的不应该用它parallel sections来实现递归合并排序,因为它会创建很多嵌套线程(后续并行计算不会重用这些线程)。第一级创建N线程,其中N是 OpenMP 运行时选择的线程数(通常是计算机上的硬件线程数或内核数)。只有两个线程将用于执行合并排序。第二级创建N*N线程(实际上将使用 4 个线程)。三分之二N*N*N。这很快会创建一个指数级的线程,比您预期的要大得多。您可以限制并行部分的线程数(num_threads(2)在此处使用),但已知嵌套的并行部分效率不高。考虑使用 OpenMP任务反而。但是,请注意,任务使用的变量是firstprivate默认情况下,而并行部分之一是shared默认情况下。

代码应如下所示(未经测试):

void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int level){
    if (level == 0){ /* Initialization of the OpenMP parallel context */
        #pragma omp parallel
        #pragma omp single
        parallelMergeSort(arr, inf, sup, arrOut, 1);
    }
    else if(level <= THRESHOLD){ /* Parallel recursion with tasks */
        #pragma omp task shared(arr, arrOut)
        parallelMergeSort(arr, inf, (inf+sup)/2, arrOut, level+1);

        /* There is no need to create a task for this one as it should recursively create new one or directly compute the array */
        parallelMergeSort(arr, (inf+sup)/2 + 1, sup, arrOut, level+1);
    }
    else { /* Leaf computation */
        mergeSort(arr, inf, sup, arrOut);
    }
}
于 2022-01-25T21:11:40.297 回答