1

今天对D语言有点好奇,于是翻了一下官网,在官网上看到了如下的wc实现:

import std.stdio;
import std.stream;

int main (string[] args)
{
    int w_total;
    int l_total;
    ulong c_total;
    int[string] dictionary;

    writefln("   lines   words   bytes file");
    foreach (arg; args[1 .. args.length])
    {
        int w_cnt, l_cnt;
        bool inword;

        auto c_cnt = std.file.getSize(arg);
        if (c_cnt < 10_000_000)
        {
            size_t wstart;
            auto input = cast(string)std.file.read(arg);

            foreach (j, c; input)
            {
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        wstart = j;
                        inword = true;
                        ++w_cnt;
                    }
                }
                else if (inword)
                {   
                    auto word = input[wstart .. j];

                    dictionary[word]++;
                    inword = false;
                }
            }
            if (inword)
            {   
                auto w = input[wstart .. input.length];
                dictionary[w]++;
            }
        }
        else
        {
            auto f = new BufferedFile(arg);
            string buf;

            while (!f.eof())
            {   
                char c;

                f.read(c);
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                    if (inword)
                    buf ~= c;
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        buf.length = 0;
                        buf ~= c;
                        inword = 1;
                        ++w_cnt;
                    }
                    else
                        buf ~= c;
                }
                else if (inword)
                {
                    if (++dictionary[buf] == 1)
                        buf = null;
                    inword = 0;
                }
            }
            if (inword)
            {
                dictionary[buf]++;
            }
        }
        writefln("%8s%8s%8s %s\n", l_cnt, w_cnt, c_cnt, arg);
        l_total += l_cnt;
        w_total += w_cnt;
        c_total += c_cnt;
    }

    if (args.length > 2)
    {
        writefln("--------------------------------------\n%8s%8s%8s total",
        l_total, w_total, c_total);
    }

    writefln("--------------------------------------");

    foreach (word1; dictionary.keys.sort)
    {
        writefln("%3s %s", dictionary[word1], word1);
    }
    return 0;
}

无论如何,在第 86 行,代码在字典中第一次出现该单词时将 buf 设置为 null。

             if (++dictionary[buf] == 1)
                buf = null;
             inword = 0;
            }

这样做有什么好处?我测试了省略那部分的方法,得到了相同的结果。

4

1 回答 1

0

I'm just guessing here. But, that's pretty old code so the reason probably has to do with immutable. In older versions of D, there was no immutable in the type system, so a string was just an alias for plain char[].

In an associative array, if you change a key it can break it since the hashes don't match up - you could get two entries in the tree where there should be only one and other tough to find bugs (hence in newer D versions, if you try int[char[]], it will complain that the key must be immutable).

Changing length to zero, which the code does for a new word, might reuse the existing buffer. I'm pretty sure it wouldn't now, but maybe it did at the time. This could overwrite an existing entry in the hash table. Setting it to null ensures that there's actually a new buffer allocated.

Bottom line: it probably would randomly not work without that line at the time it was written.

于 2013-01-07T17:06:55.793 回答