20

我需要 ac 语言代码来对一些字符串进行排序,它应该区分大小写,对于大小写相同的字母,小写必须在前。例如以下字符串的排序结果:

eggs
bacon
cheese
Milk
spinach
potatoes
milk
spaghetti

应该:

bacon
cheese
eggs
milk
Milk
potatoes
spaghetti
spinach

我写了一个代码,但我得到的结果是:

Milk
bacon
cheese
eggs
milk
potatoes
spaghetti
spinach

我不知道如何改进这一点,我已经搜索了很多。谁能帮我解决这个问题?

#include <stdio.h>
#include <string.h>

int main(){
    char c;
    char name[20][10], temp[10];
    int count_name = 0;
    int name_index = 0;
    int i, j;

    while ((c = getchar()) != EOF){
        if (c == 10){
            name[count_name][name_index] = '\0';
            count_name++;
            name_index = 0;
        } else {
            name[count_name][name_index] = c;
            name_index++;
        }
    }

    for(i=0; i < count_name-1 ; i++){
        for(j=i+1; j< count_name; j++)
        {
            if(strcmp(name[i],name[j]) > 0)
            {
                strcpy(temp,name[i]);
                strcpy(name[i],name[j]);
                strcpy(name[j],temp);
            }
        }
    }

    for (i = 0; i < count_name; i++){
        printf("%s\n", name[i]);
    }
}
4

6 回答 6

22

Keep alike words together...

For lists of words, it is often more useful to group the "same" words together (even though they differ in case). For example:

Keeping things together:          Simple "M after m":
------------------------          -------------------
mars                              mars
mars bar                          mars bar
Mars bar                          milk
milk                              milk-duds
Milk                              milky-way
milk-duds                         Mars bar
milky-way                         Milk
Milky-way                         Milky-way

If you want words arranged like the first column, I present three ways of doing so:

  • Using strcasecmp() combined with strcmp().
  • A single pass implementation tracking the character type with isalpha(), tolower(), and isupper().
  • A single pass implementation that utilizes a collating table.

In the end I discuss two alternatives:

  • Using the collating table to establish an arbitrary ordering.
  • Setting the locale to use locale based collating.

Using available library functions

If it is possible to do so, avoid reinventing the wheel. In this case, we can do so by using the POSIX function strcasecmp() to see if they are equal with a case-insensitive comparison, and falling back on strcmp() when they are.

int alphaBetize (const char *a, const char *b) {
    int r = strcasecmp(a, b);
    if (r) return r;
    /* if equal ignoring case, use opposite of strcmp() result to get
     * lower before upper */
    return -strcmp(a, b); /* aka: return strcmp(b, a); */
}

(On some systems, the case-insensitive comparison function is called stricmp() or _stricmp(). If one is not available to you, an implementation is provided below.)

#ifdef I_DONT_HAVE_STRCASECMP
int strcasecmp (const char *a, const char *b) {
    while (*a && *b) {
        if (tolower(*a) != tolower(*b)) {
            break;
        }
        ++a;
        ++b;
    }
    return tolower(*a) - tolower(*b);
}
#endif

Avoiding two passes over the strings

Sometimes, existing functions do not perform well enough, and you have to do something else to make things faster. The following function does the comparison in roughly the same way in a single pass, and without using either strcasecmp() or strcmp(). But, it treats all non-alphabetical characters as being less than letters.

int alphaBetize (const char *a, const char *b) {
    int weight = 0;
    do {
        if (*a != *b) {
            if (!(isalpha(*a) && isalpha(*b))) {
                if (isalpha(*a) || isalpha(*b)) {
                    return isalpha(*a) - isalpha(*b);
                }
                return *a - *b;
            }
            if (tolower(*a) != tolower(*b)) {
                return tolower(*a) - tolower(*b);
            }
            /* treat as equal, but mark the weight if not set */
            if (weight == 0) {
                weight = isupper(*a) - isupper(*b);
            }
        }
        ++a;
        ++b;
    } while (*a && *b);
    /* if the words compared equal, use the weight as tie breaker */
    if (*a == *b) {
        return weight;
    }
    return !*b - !*a;
}

Using this comparison for sorting will keep milk and Milk next to each other even if the list includes milk-duds.

Using a collating table

Here is a way to dynamically create a collating table from a "configuration". It serves to illustrate a contrastive technique to change how strings get compared.

You can map how the letters of the alphabet are compared with a kind of simple table that describes the relative order you want letters (or any character except the NUL byte) to have:

const char * alphaBetical =
    "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ";

From this ordering, we can create a look-up table to see how two letters are supposed to compare to each other. The following function initializes the table if it has not already been done first, and otherwise performs the table look-up.

int alphaBeta_lookup (int c) {
    static int initialized;
    static char table[CHAR_MAX+1];
    if (!initialized) {
        /* leave all non-alphaBeticals in their relative order, but below
           alphaBeticals */
        int i, j;
        for (i = j = 1; i < CHAR_MAX+1; ++i) {
            if (strchr(alphaBetical, i)) continue;
            table[i] = j++;
        }
        /* now run through the alphaBeticals */
        for (i = 0; alphaBetical[i]; ++i) {
            table[(int)alphaBetical[i]] = j++;
        }
        initialized = 1;
    }
    /* return the computed ordinal of the provided character */
    if (c < 0 || c > CHAR_MAX) return c;
    return table[c];
}

With this look-up table, we can now simplify the loop body of the alphaBetize() comparison function:

int alphaBetize (const char *a, const char *b) {
    int ax = alphaBeta_lookup(*a);
    int bx = alphaBeta_lookup(*b);
    int weight = 0;
    do {
        char al = tolower(*a);
        char bl = tolower(*b);
        if (ax != bx) {
            if (al != bl) {
                return alphaBeta_lookup(al) - alphaBeta_lookup(bl);
            }
            if (weight == 0) {
                weight = ax - bx;
            }
        }
        ax = alphaBeta_lookup(*++a);
        bx = alphaBeta_lookup(*++b);
    } while (ax && bx);
    /* if the words compared equal, use the weight as tie breaker */
    return (ax != bx) ? !bx - !ax : weight;
}

Can we make things simpler?

Using the collating table, you can create many different orderings with a simplified comparison function, like:

int simple_collating (const char *a, const char *b) {
    while (alphaBeta_lookup(*a) == alphaBeta_lookup(*b)) {
        if (*a == '\0') break;
        ++a, ++b;
    }
    return alphaBeta_lookup(*a) - alphaBeta_lookup(*b);
}

Using this same function and through modifying the alphaBetical string, you can achieve nearly any ordering you want (alphabetical, reverse alphabetical, vowels before consonants, etc.). However, the arrangement of keeping alike words together requires interspersing capitalized words with words in lowercase, and this can only be done by doing a comparison that ignores case.

Note that with the simple_collating() function above and the alphaBetical string I provided, Bacon will come before milk, but Mars will go after milk and before Milk.

If you want to sort based on your locale.

If you want to use a collating sequence that is already defined for your locale, you can set the locale and call the collating comparison function:

/*
 * To change the collating locale, use (for example):
       setlocale(LC_COLLATE, "en.US");
 */
int iso_collating (const char *a, const char *b) {
    return strcoll(a, b);
}

Now, by changing the locale, the sorting order will be based on a standardized collating sequence.

于 2014-08-23T12:42:58.747 回答
12

您可以为排序编写自定义比较函数。

首先,看一下默认的strcmp排序顺序:

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

const char *tgt[]={
    "bacon", "Bacon", "mIlk", "Milk", "spinach", "MILK", "milk", "eggs"
};
int tgt_size=8;

static int cmp(const void *p1, const void *p2){
    return strcmp(* (char * const *) p1, * (char * const *) p2);
}

int main(int argc, char *argv[]) {
    printf("Before sort:\n\t");
    for(int n=0; n<tgt_size; n++)
        printf("%s ", tgt[n]);

    qsort(tgt, tgt_size, sizeof(char *), cmp);

    printf("\nAfter sort:\n\t");
    for(int n=0; n<tgt_size; n++)
        printf("%s ", tgt[n]);

    return 0;
}

strcmp按 ASCII 字符代码排序;即,它会排序A-Z,所以a-z所有大写的 AZ 都在任何带有小写字母的单词之前:

Before sort:
    bacon Bacon mIlk Milk spinach MILK milk eggs 
After sort:
    Bacon MILK Milk bacon eggs mIlk milk spinach 

我们可以编写自己的比较函数,cmp用于qsort忽略大小写。看起来像这样:

int mycmp(const char *a, const char *b) {
    const char *cp1 = a, *cp2 = b;

    for (; toupper(*cp1) == toupper(*cp2); cp1++, cp2++)
        if (*cp1 == '\0')
            return 0;
    return ((toupper(*cp1) < toupper(*cp2)) ? -1 : +1);
} 

请务必更改cmp为:

static int cmp(const void *p1, const void *p2){
    return mycmp(* (char * const *) p1, * (char * const *) p2);
}

忽略大小写的版本现在打印:

Before sort:
    bacon Bacon mIlk Milk spinach MILK milk eggs 
After sort:
    bacon Bacon eggs Milk MILK milk mIlk spinach 

这与使用 POSIX 函数strcasecmp获得的输出相同。

该函数mycmp首先按正常顺序按字典顺序进行比较[a|A]-[z|Z]。这意味着你会得到像字母一样的单词,但你可能会得到bacon, Bacon尽可能多的单词Bacon, bacon。这是因为 qsort 不是一个稳定的排序,'Bacon' 比较等于 'bacon'。

现在我们想要的是如果比较为 0 而忽略大小写(即,像“MILK”和“牛奶”这样的同一个词)现在比较包括大小写并颠倒顺序:

int mycmp(const char *a, const char *b) {
    const char *cp1 = a, *cp2 = b;
    int sccmp=1;

    for (; toupper(*cp1) == toupper(*cp2); cp1++, cp2++)
        if (*cp1 == '\0')
            sccmp = 0;
    if (sccmp) return ((toupper(*cp1) < toupper(*cp2)) ? -1 : +1);

    for (; *a == *b; a++, b++)
        if (*a == '\0')
            return 0;
    return ((*a < *b) ? +1 : -1);
}

最终版本打印:

Before sort:
    bacon Bacon mIlk Milk spinach MILK milk eggs 
After sort:
    bacon Bacon eggs milk mIlk Milk MILK spinach 

不幸的是,这种方法对于 UNICODE 来说变得笨拙。对于复杂排序,请考虑使用映射或多步排序和稳定排序

对于复杂和位置感知的字母排序规则,请考虑Unicode 排序规则。例如,在不同的位置,字母的字母顺序不同:

Swedish                      z < ö
                             y == w
German                       ö < z
Danish                       Z < Å
Lithuanian                   i < y < k
Tr German                    ä == æ
Tr Spanish                   c < ch < d   
German Dictionary Sort:      of < öf
German Phonebook Sort:       öf < of

这些区别的默认值在默认 Unicode 归类元素表 ( DUCET ) 中捕获,该表为 UNICODE 归类和字符串比较提供默认映射。您可以修改默认值以捕获字典排序和电话簿排序之间的区别、不同的位置或不同的大小写处理。在 Unicode 通用区域设置数据存储库 ( CLDR )中主动跟踪各个位置变化。

多级排序的推荐是分层的:

Level   Description           Examples
L1      Base characters       role < roles < rule
L2      Accents               role < rôle < roles
L3      Case/Variants         role < Role < rôle
L4      Punctuation           role < “role” < Role
Ln      Identical             role < ro□le < “role”

ICU 库中广泛使用了 Unicode 排序规则的实现。几个示例的默认 DUCET 排序规则是:

b < B < bad < Bad < bäd
c < C < cote < coté < côte < côté 

您可以使用ICU Explorer 浏览ICU 库并更改位置和目标

如果你想实现你自己的 DUCET 版本,你可以按照这个Python 脚本中使用的通用方法。这不是压倒性的,但也不是微不足道的。

于 2014-08-22T23:13:29.133 回答
4

在这里,如果我做对了,你想要一些我会描述如下的东西:

不区分大小写的排序,在平局下,将使用决胜局条件“小写优先”。

所以它就像:

  1. earlier_letter_in_the_alphabet < later_letter_in_the_alphabet无视案件

  2. lowercase < uppercase

  3. shorter_word < wider_word
    • 这个没有提到,我是从字典顺序借来的,字典里的那个
    • 可以简单地通过'\0'在比较中取最低值来使用

仅当 1 没有区分任何内容时才执行第 2 步。步骤 3 已经用 1 进行检查。所有这些都将逐个字母地完成,这意味着您应该在相应字符之间获得平局时立即切换到 2,而不仅仅是当整个字符串平局时。


假设这是正确的,我们现在需要做的就是编写一个函数,为我们对任何给定的两个字符串进行比较。

#include <ctype.h>  // for tolower and islower

int my_character_compare(const char a, const char b)
{
    int my_result;

    my_result = tolower(a) - tolower(b);
    // unless it is zero, my_result is definitely the result here
    // Note: if any one of them was 0, result will also properly favour that one


    if (my_result == 0 && a != b)
    // if (could not be distinguished with #1, but are different)
    {
        // means that they are case-insensitively same
        // but different...
        // means that one of them are lowercase, the other one is upper
        if (islower(a))
            return -1;  // favour a
        else
            return 1;   // favour b
    }


    // regardless if zero or not, my_result is definitely just the result
    return my_result;
}

int my_string_compare(const char * a, const char * b)
{
    int my_result;

    my_result = my_character_compare(*a, *b);
    // unless it is zero, my_result is definitely the result here

    while (my_result == 0 && *a != 0)
    // current characters deemed to be same
    // if they are not both just 0 we will have to check the next ones
    {
        my_result = my_character_compare(*++a, *++b);
    }

    // whatever the my_result has been:
    //   whether it became != zero on the way and broke out of the loop
    //   or it is still zero, but we have also reached the end of the road/strings
    return my_result;
}

根据约定/规则,比较函数应该返回负值以支持第一个参数在前面,负值表示支持第二个参数,如果无法区分它们,则返回零。只是一个附加信息,您可能已经通过使用strcmp.

就是这样!strcmp在您的代码中将其替换为my_string_compare此处,并将我们所做的这些定义放在上面应该提供正确的结果。实际上,它为有问题的示例输入提供了预期的结果。

当然,可以缩短定义,我把它们写得很长,以便更容易理解发生了什么。例如,我可以将其归结为以下内容:

#include <ctype.h>

int my_string_compare(const char * a, const char * b)
{
    int my_result;

    while (*a || *b)
    {
        if ((my_result = tolower(*a) - tolower(*b)))
            return my_result;
        if (*a != *b)
            return (islower(*a)) ? -1 : 1;
        a++;
        b++;
    }

    return 0;
}

与另一个基本相同,您可以使用任何您喜欢的;甚至更好,写一个。

于 2014-08-23T10:49:11.777 回答
4

OP 代码的关键是使用函数strcmp()来比较两个字符串。
因此,我将首先用另一个标准函数替换这个标准函数,如下所示:

  // We assume that the collating sequence satisfies the following rules:
  // 'A' < 'B' < 'C' < ...
  // 'a' < 'b' < 'c' < ...
  // We don't make any other assumptions.

  #include <ctype.h>      
  int my_strcmp(const char * s1, const char * s2)
  {
      const char *p1 = s1, *p2 = s2;
      while(*p1 == *p2 && *p1 != '\0' && *p2 != '\0')
          p1++, p2++;  /* keep searching... */

      if (*p1 == *p2)
         return 0;
      if (*p1 == '\0')
         return -1;
      if (*p2 == '\0')
         return +1;

      char c1 = tolower(*p1),      c2 = tolower(*p2);
      int  u1 = isupper(*p1) != 0, u2 = isupper(*p2) != 0;
      if (c1 != c2)
        return c1 - c2;  // <<--- Alphabetical order assumption is used here 
      if (c1 == c2)
        return u1 - u2;
  }

最后几行可以这样压缩:

     return (c1 != c2)? c1 - c2: u1 - u2;

现在,通过替换strcmp()my_strcmp()您将获得所需的结果。

在排序算法中,最好分别考虑它的三个主要方面:

  • 比较函数。
  • 我们将使用的抽象排序算法。
  • 当必须交换两个项目时,该数据的方式将在数组中“移动”。

这些方面可以独立优化。
因此,例如,一旦你很好地解决了比较函数,下一个优化步骤可能是用更有效的算法替换双精度排序算法,比如快速排序
特别是qsort()标准库的功能<stdlib.h>为你提供了这样的算法,所以你不需要关心编程。
最后,用于存储阵列信息的策略可能会对性能产生影响。
存储诸如“char 指针数组”而不是“char 数组数组”之类的字符串会更有效,因为交换指针比交换两个完整的字符数组更快。

指针数组

附加说明:三个 firstif()实际上是多余的,因为以下句子的逻辑暗示在*p1or*p2为 0 的情况下所需的结果。但是,通过保留这些if()'s,代码变得更具可读性。

于 2014-08-23T02:48:46.517 回答
4

我在这个讨论中迟到了,并且没有特别期望去参加并获得神话般的奖品,但没有看到使用我首先寻找的成语的解决方案,我想我会插话。

在阅读问题规范时,我的第一个想法是某种形式的自定义整理顺序,我基本上是在 @jxh 的使用整理表概念中找到的。我不认为不区分大小写是核心概念,只是奇怪的排序。

因此,我提供以下代码纯粹作为替代实现。它特定于 glibc -使用了 qsort_r(3) - 但感觉像是一种轻量级的方法,并且在运行时支持许多整理序列。但它经过了轻微的测试,我很可能会遗漏各种弱点。其中:我没有特别关注 Unicode 或一般的宽字符世界,并且为避免负数组下标而强制转换为 unsigned char 让人感到怀疑。

#include <stdio.h>
#include <limits.h>

/*
 * Initialize an index array associated with the collating
 * sequence in co. The affected array can subsequently be
 * passed in as the final client data pointer into qsort_r
 * to be used by collating_compare below.
 */
int
collating_init(const char *co, int *cv, size_t n)
{
    const unsigned char *uco = (const unsigned char *) co;
    const unsigned char *s;
    size_t i;

    if (n <= UCHAR_MAX) {
        return -1;
    }
    for (i = 0; i < n; i++) {
        /* default for chars not named in the sequence */
        cv[i] = UCHAR_MAX;
    }
    for (s = uco; *s; s++) {
        /*
         * the "collating value" for a character's own
         * character code is its ordinal (starting from
         * zero) in the collating sequence. I.e., we
         * compare the values of cv['a'] and cv['A'] -
         * rather than 'a' and 'A' - to determine order.
         */
        cv[*s] = (s - uco);
    }

    return 0;
}

static int
_collating_compare(const char *str1, const char *str2, int *ip)
{
    const unsigned char *s1 = (const unsigned char *) str1;
    const unsigned char *s2 = (const unsigned char *) str2;

    while (*s1 != '\0' && *s2 != '\0') {
        int cv1 = ip[*s1++];
        int cv2 = ip[*s2++];

        if (cv1 < cv2) return -1;
        if (cv1 > cv2) return 1;
    }

    if (*s1 == '\0' && *s2 == '\0') {
        return 0;
    } else {
        return *s1 == '\0' ? -1 : 1;
    }
}

int
collating_compare(const void *v1, const void *v2, void *p)
{
    return _collating_compare(*(const char **) v1,
                              *(const char **) v2,
                              (int *) p);
}

前者接近于可以放在单独的模块或库中的代码,但缺少自己的头文件(或头文件中的条目)。我自己的测试只是将上面和下面的代码连接到一个名为 custom_collat​​e_sort.c 的文件中,并使用

gcc -DMAIN_TEST -Wall -o custom_collate_sort custom_collate_sort.c

...编译它。

#if defined(MAIN_TEST)

/* qsort_r is a GNU-ey thing... */

#define __USE_GNU
#include <stdlib.h>
#include <string.h>

#define NELEM(x) (sizeof x / sizeof 0[x])

static int
cmp(const void *v1, const void *v2)
{
    return strcmp(*(const char **) v1, *(const char **) v2);
}

static int
casecmp(const void *v1, const void *v2)
{
    return strcasecmp(*(const char **) v1, *(const char **) v2);
}

int
main(int ac, char *av[])
{
    size_t i;

    int cval[256], ret;
    int cval_rev[256], rret;

    char *tosort[] = {
        "cheeSE", "eggs", "Milk", "potatoes", "cheese", "spaghetti",
        "eggs", "milk", "spinach", "bacon", "egg", "apple", "PEAR",
        "pea", "berry"
    };

    ret = collating_init("aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVxXyYzZ",
                         cval, NELEM(cval));

    rret = collating_init("ZzYyXxVvUuTtSsRrQqPpOoNnMmLlKkJjIiHhGgFfEeDdCcBbAa",
                          cval_rev, NELEM(cval_rev));

    if (ret == -1 || rret == -1) {
        fputs("collating value array must accomodate an index of UCHAR_MAX\n", stderr);
        return 1;
    }

    puts("Unsorted:");
    for (i = 0; i < NELEM(tosort); i++) {
        printf("  %s\n", tosort[i]);
    }

    qsort((void *) tosort, NELEM(tosort), sizeof tosort[0], cmp);

    puts("Sorted w/ strcmp:");
    for (i = 0; i < NELEM(tosort); i++) {
        printf("  %s\n", tosort[i]);
    }

    qsort((void *) tosort, NELEM(tosort), sizeof tosort[0], casecmp);

    puts("Sorted w/ strcasecmp:");
    for (i = 0; i < NELEM(tosort); i++) {
        printf("  %s\n", tosort[i]);
    }

    qsort_r((void *) tosort, NELEM(tosort), sizeof tosort[0],
            collating_compare, (void *) cval);

    puts("Sorted w/ collating sequence:");
    for (i = 0; i < NELEM(tosort); i++) {
        printf("  %s\n", tosort[i]);
    }

    qsort_r((void *) tosort, NELEM(tosort), sizeof tosort[0],
            collating_compare, (void *) cval_rev);

    puts("Sorted w/ reversed collating sequence:");
    for (i = 0; i < NELEM(tosort); i++) {
        printf("  %s\n", tosort[i]);
    }

    return 0;
}

#endif /* MAIN_TEST */
于 2014-08-28T17:13:06.403 回答
2

程序所需的标准头文件:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

主程序从这里开始:

//Global Variables Required
//=========
const char *tgt[]={"bacon", "Bacon", "mIlk", 
        "Milk", "spinach", "MILK", "milk", "eggs"};    //Array for sorting
int tgt_size=8;                                        //Total Number of Records
int     SortLookupTable[128];                          //Custom sorting table
typedef int      cmp_t (const void *, const void *);




int main(int argc, char *argv[]) {
    printf("Before sort:\n\n");
    int n=0;
    for(n=0; n<tgt_size; n++)
        printf("%s\n", tgt[n]);

    CreateSortTable();

    myQsort(tgt, tgt_size, sizeof(char *), &compare);
    printf("\n\n====\n\n");
    for(n=0; n<tgt_size; n++)
        printf("%s\n", tgt[n]);

    return 0;
}

根据需要自定义排序表:

void CreateSortTable(void){
    int     i;
    for (i = 0; i < 128; i++){
        SortLookupTable[i] = 0;
    }
    char *s;
    s=(char *)malloc(64);
    memset(s, 0, 64);
    strcpy(s, "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ");

    i=1;
    for (; *s; s++){
        SortLookupTable[(int) ((unsigned char) *s)]=i;
        i++;
    }
}

快速排序算法,您还可以使用提供的标准库:

//Some important Definations required by Quick Sort
=======
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
    es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;

#define swap(a, b)                          \
    if (swaptype == 0) {                    \
        long t = *(long *)(a);              \
        *(long *)(a) = *(long *)(b);        \
        *(long *)(b) = t;                   \
    } else                                  \
        swapfunc(a, b, es, swaptype)

#define vecswap(a, b, n)    if ((n) > 0) swapfunc(a, b, n, swaptype)


#define swapcode(TYPE, parmi, parmj, n) {       \
    long i = (n) / sizeof (TYPE);               \
    register TYPE *pi = (TYPE *) (parmi);       \
    register TYPE *pj = (TYPE *) (parmj);       \
    do {                                        \
        register TYPE   t = *pi;                \
        *pi++ = *pj;                            \
        *pj++ = t;                              \
        } while (--i > 0);                      \
}

#define min(a, b)   (a) < (b) ? a : b


//Other important function
void swapfunc(char *a, char *b, int n, int swaptype){
    if(swaptype <= 1)
        swapcode(long, a, b, n)
    else
        swapcode(char, a, b, n)
}

char * med3(char *a, char *b, char *c, cmp_t *cmp){
    if ( cmp(a, b) < 0){
        if (cmp(b, c) < 0){
            return b;
        }else{
            if ( cmp(a, c) < 0){

                return c;
            }else{
                return a;
            }
        }
    }else{
        if (cmp(b, c) < 0){
            return b;
        }else{
            if ( cmp(a, c) < 0){
                return a;
            }else{
                return c;
            }
        }
    }
}

//Custom Quick Sort

void myQsort(void *a, unsigned int n, unsigned int es, cmp_t *cmp){
    char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
    int d, r, swaptype, swap_cnt;

loop:   SWAPINIT(a, es);
    swap_cnt = 0;
    if (n < 7) {
        for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
            for (pl = pm; pl > (char *)a && cmp(pl - es, pl) > 0; pl -= es){
                swap(pl, pl - es);
            }
        return;
    }
    pm = (char *)a + (n / 2) * es;
    if (n > 7) {
        pl = a;
        pn = (char *)a + (n - 1) * es;
        if (n > 40) {
            d = (n / 8) * es;
            pl = med3(pl, pl + d, pl + 2 * d, cmp);
            pm = med3(pm - d, pm, pm + d, cmp);
            pn = med3(pn - 2 * d, pn - d, pn, cmp);
        }
        pm = med3(pl, pm, pn, cmp);
    }
    swap(a, pm);
    pa = pb = (char *)a + es;

    pc = pd = (char *)a + (n - 1) * es;
    for (;;) {
        while (pb <= pc && (r = cmp(pb, a)) <= 0) {
            if (r == 0) {
                swap_cnt = 1;
                swap(pa, pb);
                pa += es;
            }
            pb += es;
        }
        while (pb <= pc && (r = cmp(pc, a)) >= 0) {
            if (r == 0) {
                swap_cnt = 1;
                swap(pc, pd);
                pd -= es;
            }
            pc -= es;
        }
        if (pb > pc)
            break;
        swap(pb, pc);
        swap_cnt = 1;
        pb += es;
        pc -= es;
    }
    if (swap_cnt == 0) {  /* Switch to insertion sort */
        for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
            for (pl = pm; pl > (char *)a && cmp(pl - es, pl) > 0;
                 pl -= es)
                swap(pl, pl - es);
        return;
    }

    pn = (char *)a + n * es;
    r = min(pa - (char *)a, pb - pa);
    vecswap(a, pb - r, r);
    r = min(pd - pc, pn - pd - es);
    vecswap(pb, pn - r, r);
    if ((r = pb - pa) > es)
        myQsort(a, r / es, es, cmp);
    if ((r = pd - pc) > es) {
        /* Iterate rather than recurse to save stack space */
        a = pn - r;
        n = r / es;
        goto loop;
    }
}

其中两个最重要的功能是:

unsigned char Change(char a){
    return (unsigned char ) SortLookupTable[(int)a];
}


int compare (const void *a, const void *b){
    char *s1= *(char **)a;
    char *s2= *(char **)b;
    int ret, len, i;
    ret=0;

    if (strlen((void*)s1) > strlen((void*)s2)){
        len=strlen((void*)s1);
    }else{
        len=strlen((void*)s2) ;
    }

    for(i=0; i<len; i++){
        if ( s1[i] != s2[i]){
            if ( Change(s1[i]) < Change(s2[i]) ){
                ret=0;
                break;
            }else{
                ret=1;
                break;
            }
        }
    }

    return ret;
}
于 2014-08-25T12:36:24.207 回答