algorithm - 为什么这些小 D 程序的行为不同？

Question

我从http://www.dwheeler.com/essays/filenames-in-shell.html编写了 nul2pfb 实用程序的 D 实现，因为源代码的链接已损坏，我想尝试学习 D。我注意到它相当慢（几乎无法跟上正在传递数据的 find -print0 ，因为它应该更快，因为它不需要做任何接近尽可能多的系统调用）。

第一个实现工作正常（使用 zsh 和 bash printf 内置程序以及 /usr/bin/printf 进行测试）。第二个虽然快得多（可能是由于对 write() 的调用少得多），但它多次重复其输出的第一部分，并且无法输出其输出的其余部分。是什么导致了这种差异？我是D的新手，不明白。

工作代码：

import std.stdio;
import std.conv;

void main()
{
  foreach (ubyte[] mybuff; chunks(stdin, 4096)) {
    encodebuff (mybuff);
  }
}

@safe void encodebuff (ubyte[] mybuff) {
  foreach (ubyte i; mybuff) {
    char b = to!char(i);
    switch (i) {
      case 'a': .. case 'z':
      case 'A': .. case 'Z':
      case '0': .. case '9':
      case '/':
      case '.':
      case '_':
      case ':': writeChar(b); break;
      default: writeOctal(b); break;
      case 0: writeChar ('\n'); break;
      case '\\': writeString(`\\`); break;
      case '\t': writeString(`\t`); break;
      case '\n': writeString(`\n`); break;
      case '\r': writeString(`\r`); break;
      case '\f': writeString(`\f`); break;
      case '\v': writeString(`\v`); break;
      case '\a': writeString(`\a`); break;
      case '\b': writeString(`\b`); break;
    }
  }
}

@trusted void writeString (string a)
{
  write (a);
}

@trusted void writeOctal (int a)
{
  writef ("\\%.4o", a); // leading 0 needed for for zsh printf '%b'
}

@trusted void writeChar (char a)
{
  write (a);
}

破解版：

import std.stdio;
import std.conv;
import std.string;

void main()
{
  foreach (ubyte[] mybuff; chunks(stdin, 4096)) {
    encodebuff (mybuff);
  }
}

@safe void encodebuff (ubyte[] mybuff) {
  char[] outstring;
  foreach (ubyte i; mybuff) {
    switch (i) {
      case 'a': .. case 'z':
      case 'A': .. case 'Z':
      case '0': .. case '9':
      case '/':
      case '.':
      case '_':
      case ':':  outstring ~= to!char(i); break;
      case 0:    outstring ~= '\n'; break;
      default: char[5] mystring;
        formatOctal(mystring, i);
        outstring ~= mystring;
        break;
      case '\\': outstring ~= `\\`; break;
      case '\t': outstring ~= `\t`; break;
      case '\n': outstring ~= `\n`; break;
      case '\r': outstring ~= `\r`; break;
      case '\f': outstring ~= `\f`; break;
      case '\v': outstring ~= `\v`; break;
      case '\a': outstring ~= `\a`; break;
      case '\b': outstring ~= `\b`; break;
    }
    writeString (outstring);
  }
}

@trusted void writeString (char[] a)
{
  write (a);
}

@trusted void formatOctal (char[] b, ubyte a)
{
  sformat (b, "\\%.4o", a); // leading 0 needed for zsh printf '%b'
}

测试：（请注意，filelist 是由我的主目录上的 find -print0 生成的以 NUL 分隔的文件列表，而 filelist2.txt 是由 filelist.txt sed -e 's/\x0/\n/g' 从 filelist 生成的> filelist2.txt，因此是相应的以换行符分隔的文件名列表）。

# the sed script escapes the backslashes so xargs does not clobber them
diff filelist2.txt <(<filelist.txt char2code2 | sed -e 's/\\/\\\\/g' | xargs /usr/bin/printf  "%b\n") 
# from within zsh
bash -c 'diff filelist2.txt <(for i in "$(<filelist.txt char2code)"; do printf "%b\n" "$i"; done)' 
# from within zsh and bash
diff filelist.txt <(for i in $(char2code <filelist.txt); do printf '%b\0' "$i"; done)
# from within zsh, bash, and dash
for i in $(char2code <filelist.txt); do printf '%b\0' "$i"; done | diff - filelist.txt

我作为酸性测试制作的脚本：

#!/bin/bash
# this creates a completely random list of NUL-delimited strings
a=''
trap 'rm -f "$a"' EXIT
a="$(mktemp)";
</dev/urandom sed -e 's/\x0\x0/\x0/g' | dd count=2048 of="$a"
test -s "$a" || exit 1
printf '\0' >> "$a"
for i in $("$@" < "$a")
do
    printf '%b\0' "$i"
done | diff - "$a"

差异的原因是什么？

编辑：我已经实施了@yaz 和@MichalMinich 建议的更改，但仍然看到错误的结果。具体来说，找到 -print0 | 我的主目录中的 char2code2（程序的名称，在我的 $PATH 中）导致退出状态为 1 并且没有输出。但是，它适用于项目少得多的附属目录。我修改后的来源如下：

import std.stdio;
import std.conv;
import std.format;
import std.array;

void main()
{
  foreach (ubyte[] mybuff; chunks(stdin, 4096)) {
    encodebuff (mybuff);
  }
  writeln();
}

void encodebuff (ubyte[] mybuff) {
  auto buffer = appender!string();
  foreach (ubyte i; mybuff) {
    switch (i) {
      case 'a': .. case 'z':
      case 'A': .. case 'Z':
      case '0': .. case '9':
      case '/':
      case '.':
      case '_':
      case ':':  buffer.put(to!char(i)); break;
      case 0:    buffer.put('\n'); break;
      default: formatOctal(buffer, i); break;
      case '\\': buffer.put(`\\`); break;
      case '\t': buffer.put(`\t`); break;
      case '\n': buffer.put(`\n`); break;
      case '\r': buffer.put(`\r`); break;
      case '\f': buffer.put(`\f`); break;
      case '\v': buffer.put(`\v`); break;
      case '\a': buffer.put(`\a`); break;
      case '\b': buffer.put(`\b`); break;
    }
  }
  writeString (buffer.data);
  //  writef(stderr, "Wrote a line\n");
}

@trusted void writeString (string a)
{
  write (a);
}

@trusted void formatOctal(Writer)(Writer w, ubyte a)
{
  formattedWrite(w, "\\%.4o", a); // leading 0 needed for zsh printf '%b'
}

score 3 · Accepted Answer

你需要采取writeString外部foreach。encodebuff目前，您正在编写outstring每个循环而不清除它。@Michal Minich 指出的问题也是有效的。

score 2 · Accepted Answer

一个原因可能是您总是附加 5 个字符的char[5] mystring. 函数sformatinformatOctal返回格式化的字符串，它可能少于 5 个字符（可能是缓冲区的一部分），您应该使用该字符串附加到 outstring。

性能建议：在构建字符串时使用Appender而不是 ~= 以获得更好的性能。

score 1 · Accepted Answer

真正的问题出在我的 LDC 安装上。它使用了共享库，它的 druntime 版本不支持这些库。

将 LDC 重新编译到使用的静态库解决了这个问题。

algorithm - 为什么这些小 D 程序的行为不同？

3 回答 3

Related

Reference