5

我注意到一些 关于在字符串中查找第 n 个字符的问题。 由于我很好奇(并且在应用程序中有多种用途,但主要是出于好奇),我在 Visual Studio 2010 中对其中两种方法进行了编码和基准测试,我想知道为什么方法 1 ( ) 比方法慢得多2 ( )。我能想到的唯一原因是:FindNthOccurrenceIndexOfNth

  1. 我的基准测试代码有问题
  2. 我的算法有问题
  3. indexOf是一种内置的 .NET 方法,因此已经过优化

我倾向于#2,但我仍然想知道。这是相关的代码。

代码

class Program
    {
        static void Main(string[] args)
        {
            char searchChar = 'a';
            Random r = new Random(UnixTimestamp());

            // Generate sample data
            int numSearches = 100000, inputLength = 100;
            List<String> inputs = new List<String>(numSearches);
            List<int> nth = new List<int>(numSearches);
            List<int> occurrences = new List<int>(numSearches);
            for (int i = 0; i < numSearches; i++)
            {
                inputs.Add(GenerateRandomString(inputLength, "abcdefghijklmnopqrstuvwxyz"));
                nth.Add(r.Next(1, 4));
            }

            // Timing of FindNthOccurrence
            Stopwatch timeFindNth = Stopwatch.StartNew();
            for (int i = 0; i < numSearches; i++)
                occurrences.Add(FindNthOccurrence(inputs[i], searchChar, nth[i]));
            timeFindNth.Stop();

            Console.WriteLine(String.Format("FindNthOccurrence: {0} / {1}",
                                            timeFindNth.ElapsedMilliseconds, timeFindNth.ElapsedTicks));

            // Cleanup
            occurrences.Clear();

            // Timing of IndexOfNth
            Stopwatch timeIndexOf = Stopwatch.StartNew();
            for (int i = 0; i < numSearches; i++)
                occurrences.Add(IndexOfNth(inputs[i], searchChar, nth[i]));
            timeIndexOf.Stop();
            Console.WriteLine(String.Format("IndexOfNth: {0} / {1}",
                                            timeIndexOf.ElapsedMilliseconds, timeIndexOf.ElapsedTicks));

            Console.Read();
        }

        static int FindNthOccurrence(String input, char c, int n)
        {
            int len = input.Length;
            int occurrences = 0;
            for (int i = 0; i < len; i++)
            {
                if (input[i] == c)
                {
                    occurrences++;
                    if (occurrences == n)
                        return i;
                }
            }
            return -1;
        }

        static int IndexOfNth(String input, char c, int n)
        {
            int occurrence = 0;
            int pos = input.IndexOf(c, 0);
            while (pos != -1)
            {
                occurrence++;
                if (occurrence == n)
                    return pos;
                pos = input.IndexOf(c, pos + 1);
            }
            return -1;
        }

            // Helper methods
        static String GenerateRandomString(int length, String legalCharacters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
        {
            if (length < 0) throw new ArgumentOutOfRangeException("length", "length cannot be less than zero.");
            if (string.IsNullOrEmpty(legalCharacters))
                throw new ArgumentException("allowedChars may not be empty.");

            const int byteSize = 0x100;
            var legalCharSet = new HashSet<char>(legalCharacters).ToArray();
            if (byteSize < legalCharSet.Length)
                throw new ArgumentException(String.Format("allowedChars may contain no more than {0} characters.", byteSize));

            // Guid.NewGuid and System.Random are not particularly random. By using a
            // cryptographically-secure random number generator, the caller is always
            // protected, regardless of use.
            using (var rng = new System.Security.Cryptography.RNGCryptoServiceProvider())
            {
                StringBuilder result = new StringBuilder();
                var buf = new byte[128];
                while (result.Length < length)
                {
                    rng.GetBytes(buf);
                    for (var i = 0; i < buf.Length && result.Length < length; ++i)
                    {
                        // Divide the byte into legalCharSet-sized groups. If the
                        // random value falls into the last group and the last group is
                        // too small to choose from the entire legalCharSet, ignore
                        // the value in order to avoid biasing the result.
                        var outOfRangeStart = byteSize - (byteSize % legalCharSet.Length);
                        if (outOfRangeStart <= buf[i]) continue;
                        result.Append(legalCharSet[buf[i] % legalCharSet.Length]);
                    }
                }
                return result.ToString();
            }
        }

        static int UnixTimestamp()
        {
            TimeSpan ts = (System.DateTime.UtcNow - new System.DateTime(1970, 1, 1, 0, 0, 0));
            return (int)ts.TotalSeconds;
        }
    }

样本输出

每个结果输出与此类似的时间(毫秒/经过的滴答声):

FindNthOccurrence: 27 / 79716
IndexOfNth: 12 / 36492
4

2 回答 2

1

我确定您运行的是调试版本。切换到发布版本。两种方法都需要大约相同的时间。

于 2012-07-24T16:33:29.560 回答
1

通过使用 Reflector 的源代码浏览System.String,似乎IndexOf调用了一个方法,该方法定义为:

public extern int IndexOf(char value, int startIndex, int count);

所以它调用了一些内部非托管代码,这可能提供了观察到的速度提升。使用托管代码不太可能更快。

于 2012-07-24T18:53:13.477 回答