2

作为提高我的编程技能计划的一部分,我决定尝试按字典顺序对大量字符串进行排序(稍后我计划使用线程来这样做)。我研究了不同的排序算法,并尝试根据我的理解实现一个合并排序。现在我打算对一些简单的字符串进行排序。

我正在输入以下要在以下方法中排序的字符串:

 string[] stringsArray = new string[] { "cixymn", "adfxij", "adxhxy", "abcdef", "iejfyq", "uqbzxo", "aaaaaa" };
 string[] stringSorted = MergeSort(stringsArray);

 // For display purposes
 foreach (string s in stringSorted)
        {
            Console.WriteLine("Item at index " + Array.IndexOf(stringSorted, s) + " is " + s);
        }

我得到的结果如下:

Item at index 0 is aaaaaa
Item at index 1 is abcdef
Item at index 2 is adfxij
Item at index 3 is uqbzxo
Item at index 4 is 
Item at index 4 is 
Item at index 4 is 

由于要实现归并排序,必须先将数组一分为二,我很容易理解,在这种情况下,左边部分排序成功,右边部分被忽略。我的印象是这种情况正在发生,因为我在每次递归中比较数组左侧的每个字符串的字符(因此可能忽略右侧)。所以我想我真的明白问题可能出在哪里。但是,我不太清楚该怎么做。任何帮助将不胜感激。

下面是 MergeSort 方法的代码。

  private static string[] MergeSort(string[] stringsArray)
    {
        if (stringsArray.Length == 1)
        {
            return stringsArray;
        }

        int middle = stringsArray.Length / 2;

        string[] left = new string[middle];
        string[] right = new string[stringsArray.Length - middle];

        for (int i = 0; i < middle; i++)
        {
            left[i] = stringsArray[i];
        }

        for (int i = 0; i < stringsArray.Length - middle; i++)
        {
            right[i] = stringsArray[i + middle];
        }

        left = MergeSort(left);
        right = MergeSort(right);

        int leftPointer = 0;
        int rightPointer = 0;

        string[] sorted = new string[stringsArray.Length];

        for (int k = 0; k < stringsArray.Length; k++)
        {
            if (k == left.Length)
            {
                break;
            }

            for (int i = 0; i < left[leftPointer].Count(); i++)
            {
                var leftChar = left[leftPointer][i];

                if (i == right[rightPointer].Count())
                {
                    continue;
                }

                var rightChar = right[rightPointer][i];

                if ((rightPointer == right.Length || leftPointer < left.Length) && leftChar < rightChar)
                {
                    sorted[k] = left[leftPointer];
                    sorted[k + 1] = right[rightPointer];
                    leftPointer++;

                    break;
                }

                if ((leftPointer == left.Length || rightPointer < right.Length) && (rightChar < leftChar))
                {
                    sorted[k] = right[rightPointer];
                    sorted[k + 1] = left[leftPointer];
                    rightPointer++;
                    break;
                }
            }
        }

问题 #2:您会如何建议优化代码以便能够使用线程?

4

2 回答 2

1

这是我从这里抄袭、概括和更新的答案

public static IList<T> MergeSort<T>(
    this IList<T> unsorted,
    IComparer<T> comparer = null)
{
    if (unsorted == null ||  unsorted.Count < 2)
    {
        return unsorted;
    }

    if (comparer == null)
    {
        comparer = Comparer<T>.Default;
    }

    IList<T> sorted = new List<T>();
    int middle = (int)unsorted.Count/2;
    Ilist<T> left = unsorted.GetRange(0, middle);
    IList<T> right = unsorted.GetRange(middle, unsorted.Count - middle);

    var sortLeft = Task<IList<T>>.Factory.StartNew(
        () => left.MergeSort(comparer));

    var sortRight = Task<IList<T>>.Factory.StartNew(
        () => right.MergeSort(comparer));

    left = sortLeft.Result;
    right = sortRight.Result;

    int leftPtr = 0;
    int rightPtr = 0;
    for (int i = 0; i < left.Count + right.Count; i++)
    {
        if (leftPtr == left.Count)
        {
            sorted.Add(right[rightPtr]);
            rightPtr++;
        }
        else if (rightPtr == right.Count)
        {
            sorted.Add(left[leftPtr]);
            leftPtr++;
        }
        else if (comparer.Compare(left[leftPtr], right[rightPtr]) < 0)
        {
            sorted.Add(left[leftPtr]);
            leftPtr++;
        }
        else
        {
            sorted.Add(right[rightPtr]);
            rightPtr++;
        }
    }

    return sorted;
}

IComparer<T>除非您传递自己的代码,否则此代码将使用默认值。

正如您所看到的,这段代码在未排序数组的每一半上进行自我迭代,我添加了一些代码,使用TaskTPL 类在单独的线程上异步运行这些调用。

你可以使用这样的代码,

var strings = new List<string>
    {
        "cixymn", 
        "adfxij",
        "adxhxy",
        "abcdef",
        "iejfyq",
        "uqbzxo",
        "aaaaaa" 
    };

var sortedStrings = strings.MergeSort();          

如果默认字符串比较器对您来说字典顺序不够,您可以实例化并传递您的 a selected StringComparer,也许像这样,

var sortedStrings = strings.MergeSort(StringComparer.OrdinalIgnoreCase);

万一没有一个StringComparers 满足您的要求,您可以编写自己的实现IComparer<string>并将其传递给MergeSort函数。

在任何情况下,保持归并排序对所有类型都是通用的和可重用的,并将特化传递给函数是有意义的。

于 2012-08-10T09:00:28.943 回答
1

这是一个有效的解决方案。MergeSort 是最基本的版本,ThreadedMergeSort 使用任务和优化琐碎案例。简单版本比我机器上的 .Sort() 方法(我认为是快速排序)慢大约 30%,而线程版本大约快两倍。

    static List<T> MergeSort<T>(List<T> input) where T: IComparable
    {
        var length = input.Count;

        if (length < 2)
            return input;

        var left = MergeSort(input.GetRange(0, length / 2));
        var right = MergeSort(input.GetRange(length / 2, length - length / 2));
        var result = new List<T>();
        for (int leftIndex = 0, leftLength = left.Count, rightLength = right.Count, rightIndex = 0; leftIndex + rightIndex < length;)
        {
            if (rightIndex >= rightLength || leftIndex < leftLength && left[leftIndex].CompareTo(right[rightIndex]) <= 0)
                result.Add(left[leftIndex++]);
            else
                result.Add(right[rightIndex++]);
        }

        return result;
    }

    static List<T> ThreadedMergeSort<T>(List<T> input) where T : IComparable
    {
        var length = input.Count;

        if (length < 2)
            return input;

        // this next part can be commented out if you want a "pure" mergesort, but it
        // doesn't make sense to merge sort very short sublists.
        if (length < 10)
        {
            for (int i = 0; i < length - 1; i++)
                for (int j = i + 1; j < length; j++)
                    if (input[i].CompareTo(input[j]) > 0)
                    {
                        var tmp = input[i];
                        input[i] = input[j];
                        input[j] = tmp;
                    }
            return input;
        }

        List<T> left, right;
        if (length > 10000)
        {
            var taskLeft = Task<List<T>>.Factory.StartNew(() => { return ThreadedMergeSort(input.GetRange(0, length / 2)); });
            var taskRight = Task<List<T>>.Factory.StartNew(() => { return ThreadedMergeSort(input.GetRange(length / 2, length - length / 2)); });
            taskLeft.Wait();
            taskRight.Wait();
            left = taskLeft.Result;
            right = taskRight.Result;
        }
        else
        {
            left = ThreadedMergeSort(input.GetRange(0, length / 2));
            right = ThreadedMergeSort(input.GetRange(length / 2, length - length / 2));
        }
        var result = new List<T>();
        for (int leftIndex = 0, leftLength = left.Count, rightLength = right.Count, rightIndex = 0; leftIndex + rightIndex < length; )
        {
            if (rightIndex >= rightLength || leftIndex < leftLength && left[leftIndex].CompareTo(right[rightIndex]) <= 0)
                result.Add(left[leftIndex++]);
            else
                result.Add(right[rightIndex++]);
        }

        return result;
    }
于 2012-08-10T09:42:25.617 回答