c - 第 k 个最小的数 - 快速排序比快速选择快

Question

我已经实现了以下快速选择算法来实现O(n)中位数选择的复杂性（更一般地说是第 k 个最小的数字）：

static size_t partition(struct point **points_ptr, size_t points_size, size_t pivot_idx)
{
    const double pivot_value = points_ptr[pivot_idx]->distance;

    /* Move pivot to the end. */
    SWAP(points_ptr[pivot_idx], points_ptr[points_size - 1], struct point *);

    /* Perform the element moving. */
    size_t border_idx = 0;
    for (size_t i = 0; i < points_size - 1; ++i) {
            if (points_ptr[i]->distance < pivot_value) {
                    SWAP(points_ptr[border_idx], points_ptr[i], struct point *);
                    border_idx++;
            }
    }

    /* Move pivot to act as a border element. */
    SWAP(points_ptr[border_idx], points_ptr[points_size - 1], struct point *);

    return border_idx;
}

static struct point * qselect(struct point **points_ptr, size_t points_size, size_t k)
{
    const size_t pivot_idx = partition(points_ptr, points_size, rand() % points_size);

    if (k == pivot_idx) { //k lies on the same place as a pivot
            return points_ptr[pivot_idx];
    } else if (k < pivot_idx) { //k lies on the left of the pivot
            //points_ptr remains the same
            points_size = pivot_idx;
            //k remains the same
    } else { //k lies on the right of the pivot
            points_ptr += pivot_idx + 1;
            points_size -= pivot_idx + 1;
            k -= pivot_idx + 1;
    }

    return qselect(points_ptr, points_size, k);
}

然后我尝试将它与 glibc 进行比较，qsort()并O(nlog(n))对其卓越的性能感到惊讶。这是测量代码：

double wtime;
wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
    qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
    wtime -= omp_get_wtime();
    qsort(points_ptr, points_size, sizeof (*points_ptr), compar_distance);
    wtime += omp_get_wtime();
}
printf("qsort took %f\n", wtime);

wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
    qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
    wtime -= omp_get_wtime();
    qselect(points_ptr, points_size, points_size / 2);
    wtime += omp_get_wtime();
}
printf("qselect took %f\n", wtime);

对于 10000 个元素的数组，qsort took 0.280432结果类似于。qselect took 8.516676为什么快速排序比快速选择快？

score 1 · Accepted Answer

第一个明显的答案是：也许 qsort 没有实现快速排序。自从我阅读标准以来已经有一段时间了，但我认为没有任何要求 qsort() 执行快速排序。

第二：现有的 C 标准库通常经过大量优化（例如，在可用的情况下使用特殊的汇编指令）。结合现代 CPU 的复杂性能特征，这很可能导致 O(n log n) ——快速排序不是——算法比 O(n) 算法更快。

我的猜测是你搞砸了缓存——这是 valgrind / cachegrind 能够告诉你的。

score 1 · Accepted Answer

感谢您的建议，我的 quickselect 实现的问题在于，对于包含许多重复元素O(n^2)的输入，它表现出最坏情况的复杂性，这就是我的情况。Glibc （默认使用归并排序）在这里没有展示。qsort()O(n^2)

我已经修改了我的partition()函数以执行基本的 3 路分区和 3 的中位数，这非常适合快速选择：

/** \breif Quicksort's partition procedure.                                  
 *                                                                           
 * In linear time, partition a list into three parts: less than, greater than
 * and equals to the pivot, for example input 3 2 7 4 5 1 4 1 will be        
 * partitioned into 3 2 1 1 | 5 7 | 4 4 4 where 4 is the pivot.              
 * Modified version of the median-of-three strategy is implemented, it ends with
 * a median at the end of an array (this saves us one or two swaps).         
 */                                                                          
static void partition(struct point **points_ptr, size_t points_size,
                      size_t *less_size, size_t *equal_size)
{                                                                            
    /* Modified median-of-three and pivot selection. */                      
    struct point **first_ptr = points_ptr;                                   
    struct point **middle_ptr = points_ptr + (points_size / 2);              
    struct point **last_ptr = points_ptr + (points_size - 1);                
    if ((*first_ptr)->distance > (*last_ptr)->distance) {                    
        SWAP(*first_ptr, *last_ptr, struct point *);                         
    }                                                                        
    if ((*first_ptr)->distance > (*middle_ptr)->distance) {                  
        SWAP(*first_ptr, *middle_ptr, struct point *);                       
    }                                                                        
    if ((*last_ptr)->distance > (*middle_ptr)->distance) { //reversed        
        SWAP(*last_ptr, *middle_ptr, struct point *);                        
    }                                                                        
    const double pivot_value = (*last_ptr)->distance;                      

    /* Element swapping. */                                                  
    size_t greater_idx = 0;                                                  
    size_t equal_idx = points_size - 1;                                      
    size_t i = 0;                                                            
    while (i < equal_idx) {                                                  
        const double elem_value = points_ptr[i]->distance;                   

        if (elem_value < pivot_value) {                                      
            SWAP(points_ptr[greater_idx], points_ptr[i], struct point *);    
            greater_idx++;                                                   
            i++;                                                             
        } else if (elem_value == pivot_value) {                              
            equal_idx--;                                                     
            SWAP(points_ptr[i], points_ptr[equal_idx], struct point *);      
        } else { //elem_value > pivot_value                                  
            i++;                                                             
        }                                                                    
    }                                                                        

    *less_size = greater_idx;                                                
    *equal_size = points_size - equal_idx;                                   
}

/** A selection algorithm to find the kth smallest element in an unordered list.
 */                                                                          
static struct point * qselect(struct point **points_ptr, size_t points_size,
                              size_t k)
{                                                                            
    size_t less_size;                                                        
    size_t equal_size;                                                       

    partition(points_ptr, points_size, &less_size, &equal_size);             

    if (k < less_size) { //k lies in the less-than-pivot partition           
        points_size = less_size;                                             
    } else if (k < less_size + equal_size) { //k lies in the equals-to-pivot partition
        return points_ptr[points_size - 1];                                  
    } else { //k lies in the greater-than-pivot partition                    
        points_ptr += less_size;                                             
        points_size -= less_size + equal_size;                               
        k -= less_size + equal_size;                                         
    }                                                                        

    return qselect(points_ptr, points_size, k);                              
}

结果确实是线性的并且比qsort()（我使用了@IVlad建议的Fisher-Yates shuffle，所以绝对qsort()时间更糟）：

array size  qsort     qselect   speedup
1000        0.044678  0.008671  5.152328
5000        0.248413  0.045899  5.412160
10000       0.551095  0.096064  5.736730
20000       1.134857  0.191933  5.912773
30000       2.169177  0.278726  7.782467

c - 第 k 个最小的数 - 快速排序比快速选择快

2 回答 2

Related

Reference