1

I'm writing a quicksort algorithm to sort an array of strings.

The problem is that my array with the data seem to be overwritten with something right after i allocate the right and left quicksort arrays, because i print the array and its all there, but after i use malloc to allocate the others arrays, i print it again and i'm missing some elements.

Here's the output:

Pivot: 2
Emma, Olivia, Victoria, Gwyneth, Chloe, Hayley, Scarlett,
Emma, Olivia, Victoria, Gwyneth, , , ,

Anyone knows whats happening? What am missing?

char **concatenate(char **array1, int n1, char *pivot, char **array2, int n2, int len){
int i=0, j=0;
int elements = n1 + n2 + 1;

// alocating array
char **concat = (char**) malloc(sizeof(*concat) * elements);
concat[0] = (char*) malloc(sizeof(*concat) * elements * len);
for(i=1; i<elements; i++)
    concat[i] = &(concat[0][i*len]);

// concatenating 
for(i=0; i<n1; i++)
    concat[i] = array1[i];
concat[i++] = pivot;
for(j=0; j<n2; j++)
    concat[i++] = array2[j];

// returning
return concat;
}

char **quicksort(char **array, int elements, int len){
// array is already sorted
if(elements < 2)
    return array;

int pivot;
int i=0, l=0, r=0;

// selecting the pivot (median)
if(elements % 2 == 0)
    pivot = ((elements + 1) / 2) -1;
else
    pivot = (elements / 2) -1;

//REMOVE
printf("Pivot: %d\n", pivot);
for(i=0; i<elements; i++)
    printf("%s, ", array[i]);
printf("\n");

// alocating arrays
char **left = (char**) malloc(sizeof(*left) * pivot);
left[0] = (char*) malloc(sizeof(*left) * pivot * len);
for(i=1; i<pivot; i++)
    left[i] = &(left[0][i*len]);

char **rigth = (char**) malloc(sizeof(*rigth) * pivot);
rigth[0] = (char*) malloc(sizeof(*rigth) * pivot * len);
for(i=1; i<pivot; i++)
    rigth[i] = &(rigth[0][i*len]);

//REMOVE
for(i=0; i<elements; i++)
    printf("%s, ", array[i]);
printf("\n");

//quicksorting
for(i=0; i<elements; i++){
    if(array[i] == array[pivot])
        continue;

    int comp = strcmp(array[i], array[pivot]);

    //REMOVE
    printf("%d: strcmp %s, %s is %d\n", i, array[i], array[pivot], comp);

    if(comp < pivot)
        left[l++] = array[i];
    else
        rigth[r++] = array[i];
}

//REMOVE
printf("concatenate(");
for(i=0; i<l; i++)
    printf("%s ", left[i]);
printf("|%s| ", array[pivot]);
for(i=0; i<r; i++)
    printf("%s ", rigth[i]);
printf(")\n");

// recursion and return
return concatenate(quicksort(left, l, len), l, array[pivot], quicksort(rigth, r, len), r, len);
}

int main(int argc, char *argv[]){
int i, j, aux;                  

char **teste = (char**) malloc(sizeof(*teste) * 7);
teste[0] = (char*) malloc(sizeof(*teste) * 7 * 128);
for(i=1; i<7; i++)
    teste[i] = &(teste[0][i*128]);
teste[0] = "Emma";
teste[1] = "Olivia";
teste[2] = "Victoria";
teste[3] = "Gwyneth";
teste[4] = "Chloe";
teste[5] = "Hayley";
teste[6] = "Scarlett";

quicksort(teste, 7, 128);

printf("AFTER\n");
for(i=0; i<7; i++)
    printf("%s, ", teste[i]);
printf("\n");

return 0;
}
4

1 回答 1

8

为快速排序分配的理由为零,事实上,在您的情况下,使用简单的快速排序接口(char *arr[], unsigned int len),使用指针数学进行子序列调用,该函数就足够了。

提供交换指针的交换算法:

void swap_str_ptrs(char const **arg1, char const **arg2)
{
    const char *tmp = *arg1;
    *arg1 = *arg2;
    *arg2 = tmp;
}

那么算法是:

void quicksort_strs(char const *args[], unsigned int len)
{
    unsigned int i, pvt=0;

    if (len <= 1)
        return;

    // swap a randomly selected value to the last node
    swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);

    // reset the pivot index to zero, then scan
    for (i=0;i<len-1;++i)
    {
        if (strcmp(args[i], args[len-1]) < 0)
            swap_str_ptrs(args+i, args+pvt++);
    }

    // move the pivot value into its place
    swap_str_ptrs(args+pvt, args+len-1);

    // and invoke on the subsequences. does NOT include the pivot-slot
    quicksort_strs(args, pvt++);
    quicksort_strs(args+pvt, len - pvt);
}

这就是一切。包括分区。

这个怎么运作

有两种通用的递归快速排序算法:挤压扫描。这是扫描算法。我们沿着序列前进,将任何“小于”枢轴值的元素(在循环开始之前交换到序列的末尾)交换到目标槽,其索引最初是序列的开头,并随着每个交换操作。当“扫描”完成时,pvt索引是枢轴所属的位置,因为该槽下方的所有内容都“小于”该值。因此,再进行一次交换以将枢轴值放置到位。之后,我们有两个分区,它们是递归的。包含在这些分区中的任何一个中。它是我们所知道的唯一价值是在它最后的安息之地。

测试工具

包括上面的代码,我们故意用一组基本的字符串来测试它:

void print_list(char const *args[], unsigned len)
{
    unsigned i=0;
    for (;i<len;++i)
        puts(args[i]);
}

int main()
{
    char const *args[] =
    {
        "this", "is", "a", "test", "of", "quicksort", "with", "strings"
    };

    srand((unsigned)time(NULL));
    quicksort_strs(args, sizeof(args)/sizeof(*args));
    print_list(args, sizeof(args)/sizeof(*args));
    return 0;
}

输出

a
is
of
quicksort
strings
test
this
with

非递归实现

应该注意的是,上述算法非常适合非递归实现。本地动态堆栈用于保存数据对:指针和长度。优化为将琐碎的段(长度为 1 或 0 的段)推入堆栈,一种实现如下所示:

void quicksort_strs(char const *args[], unsigned int len)
{
    // holds our non-recursive stack of segments
    struct segment
    {
        char const **arr;
        unsigned int len;
        struct segment* next;
    } *stack = NULL;

    stack = malloc(sizeof(*stack));
    stack->arr = args;
    stack->len = len;
    stack->next = NULL;

    while (stack != NULL)
    {
        unsigned int i, pvt=0;
        struct segment *tmp = stack;
        stack = stack->next;

        // pull values and delete segment record
        args = tmp->arr;
        len = tmp->len;
        free(tmp);

        // nothing to unary segments
        if (len <= 1)
            continue;

        // swap a randomly selected value to the last node
        swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);

        // reset the pivot index to zero, then scan
        for (i=0;i<len-1;++i)
        {
            if (strcmp(args[i], args[len-1]) < 0)
                swap_str_ptrs(args+i, args+pvt++);
        }

        // move the pivot value into its place
        swap_str_ptrs(args+pvt, args+len-1);

        // lhs segment push
        if (pvt > 1)
        {
            tmp = malloc(sizeof(*tmp));
            tmp->arr = args;
            tmp->len = pvt;
            tmp->next = stack;
            stack = tmp;
        }

        // rhs segment push
        if ((len - ++pvt) > 1)
        {
            tmp = malloc(sizeof(*tmp));
            tmp->arr = args+pvt;
            tmp->len = len-pvt;
            tmp->next = stack;
            stack = tmp;
        }
    }
}

显然,有一个固定的节点堆栈实现会大大缩短这个时间,但这个想法应该很明显。将realloc()节点保存在“堆栈”末尾而不是开头的模式同样有趣,因为它将消除对next指针管理的需要,取而代之的是top索引。

无论如何,祝你好运,我希望它有所帮助。

于 2013-10-27T02:18:35.320 回答