c - "#include" C 程序中的文本文件作为 char[]

Question

有没有办法在编译时将整个文本文件作为字符串包含在 C 程序中？

就像是：

文件.txt：
```
This is
a little
text file
```

主.c：

#include <stdio.h>
int main(void) {
   #blackmagicinclude("file.txt", content)
   /*
   equiv: char[] content = "This is\na little\ntext file";
   */
   printf("%s", content);
}

获得一个在标准输出上打印的小程序“这是一个小文本文件”

目前我使用了一个 hackish python 脚本，但它很丑陋，并且仅限于一个变量名，你能告诉我另一种方法吗？

score 148 · Accepted Answer

我建议为此使用 (unix util) xxd。你可以像这样使用它

$ echo hello world > a
$ xxd -i a

输出：

unsigned char a[] = {
  0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x0a
};
unsigned int a_len = 12;

score 124 · Accepted Answer

问题是关于 C 的，但如果有人试图用 C++11 来做，那么由于新的原始字符串文字，只需对包含的文本文件进行少量更改即可完成：

在 C++ 中这样做：

const char *s =
#include "test.txt"
;

在文本文件中执行以下操作：

R"(Line 1
Line 2
Line 3
Line 4
Line 5
Line 6)"

所以文件的顶部必须只有一个前缀，文件末尾有一个后缀。在它之间你可以做你想做的事，只要你不需要字符序列就不需要特殊的转义)"。但是，如果您指定自己的自定义分隔符，即使这样也可以工作：

R"=====(Line 1
Line 2
Line 3
Now you can use "( and )" in the text file, too.
Line 5
Line 6)====="

score 15 · Accepted Answer

我喜欢kayahr的回答。但是，如果您不想触摸输入文件，并且使用的是CMake，则可以在文件上添加分隔符字符序列。例如，以下 CMake 代码复制输入文件并相应地包装其内容：

function(make_includable input_file output_file)
    file(READ ${input_file} content)
    set(delim "for_c++_include")
    set(content "R\"${delim}(\n${content})${delim}\"")
    file(WRITE ${output_file} "${content}")
endfunction(make_includable)

# Use like
make_includable(external/shaders/cool.frag generated/cool.frag)

然后像这样包含在c ++中：

constexpr char *test =
#include "generated/cool.frag"
;

score 14 · Accepted Answer

你有两种可能：

利用编译器/链接器扩展将文件转换为二进制文件，并使用适当的符号指向二进制数据的开头和结尾。请参阅此答案：Include binary file with GNU ld linker script。
将您的文件转换为可以初始化数组的字符常量序列。请注意，您不能只执行 "" 并跨越多行。您将需要一个续行字符 ( \)、转义"字符和其他字符来完成这项工作。更容易编写一个小程序来将字节转换为类似的序列'\xFF', '\xAB', ...., '\0'（或者使用另一个答案描述的 unix 工具xxd，如果你有它可用的话！）：

代码：

#include <stdio.h>

int main() {
    int c;
    while((c = fgetc(stdin)) != EOF) {
        printf("'\\x%X',", (unsigned)c);
    }
    printf("'\\0'"); // put terminating zero
}

（未测试）。然后做：

char my_file[] = {
#include "data.h"
};

其中 data.h 由

cat file.bin | ./bin2c > data.h

score 9 · Accepted Answer

好的，受Daemin帖子的启发，我测试了以下简单示例：

a.数据：

"this is test\n file\n"

测试.c：

int main(void)
{
    char *test = 
#include "a.data"
    ;
    return 0;
}

gcc -E test.c 输出：

# 1 "test.c"
# 1 "<built-in>"
# 1 "<command line>"
# 1 "test.c"

int main(void)
{
    char *test =
# 1 "a.data" 1
"this is test\n file\n"
# 6 "test.c" 2
    ;
    return 0;
}

所以它可以工作，但需要用引号括起来的数据。

score 8 · Accepted Answer

您可以使用以下方法执行此操作objcopy：

objcopy --input binary --output elf64-x86-64 myfile.txt myfile.o

现在你有了一个目标文件，你可以链接到你的可执行文件中，其中包含了内容的开始、结束和大小的符号myfile.txt。

score 6 · Accepted Answer

如果您愿意使用一些肮脏的技巧，您可以使用原始字符串文字和#include某些类型的文件来发挥创意。

例如，假设我想在我的项目中包含一些 SQLite 的 SQL 脚本，并且我想获得语法高亮但不想要任何特殊的构建基础设施。我可以拥有这个test.sql对 SQLite 有效的 SQL 文件，其中--开始注释：

--x, R"(--
SELECT * from TestTable
WHERE field = 5
--)"

然后在我的 C++ 代码中，我可以拥有：

int main()
{
    auto x = 0;
    const char* mysql = (
#include "test.sql"
    );

    cout << mysql << endl;
}

输出是：

--
SELECT * from TestTable
WHERE field = 5
--

或者从test.py一个有效的 Python 脚本文件中包含一些 Python 代码（因为#在 Python 中开始注释并且pass是无操作的）：

#define pass R"(
pass
def myfunc():
    print("Some Python code")

myfunc()
#undef pass
#define pass )"
pass

然后在 C++ 代码中：

int main()
{
    const char* mypython = (
#include "test.py"
    );

    cout << mypython << endl;
}

这将输出：

pass
def myfunc():
    print("Some Python code")

myfunc()
#undef pass
#define pass

对于您可能希望包含为字符串的各种其他类型的代码，应该可以使用类似的技巧。我不确定这是否是个好主意。这是一种巧妙的技巧，但可能不是您在实际生产代码中想要的东西。不过对于周末的黑客项目来说可能没问题。

score 2 · Accepted Answer

您需要我的xtr实用程序，但您可以使用bash script. 这是我调用的脚本bin2inc。第一个参数是结果的名称char[] variable。第二个参数是名称file。输出是 C include file，文件内容编码（小写hex）作为给定的变量名。char array是, 数据的zero terminated长度存放在$variableName_length

#!/bin/bash

fileSize ()

{

    [ -e "$1" ]  && {

        set -- `ls -l "$1"`;

        echo $5;

    }

}

echo unsigned char $1'[] = {'
./xtr -fhex -p 0x -s ', ' < "$2";
echo '0x00'
echo '};';
echo '';
echo unsigned long int ${1}_length = $(fileSize "$2")';'

您可以在此处获取 XTR xtr（字符 eXTRapolator）是 GPLV3

score 2 · Accepted Answer

为什么不将文本链接到程序中并将其用作全局变量！这是一个例子。我正在考虑使用它在可执行文件中包含 Open GL 着色器文件，因为需要在运行时为 GPU 编译 GL 着色器。

score 2 · Accepted Answer

我在 python3 中重新实现了 xxd，修复了 xxd 的所有烦恼：

常量正确性
字符串长度数据类型：int → size_t
空终止（如果您可能需要）
C 字符串兼容：unsigned放在数组上。
更小的，可读的输出，就像你写的那样：Printable ascii is output as-is; 其他字节是十六进制编码的。

这是脚本，它自己过滤，所以你可以看到它的作用：

pyxxd.c

#include <stddef.h>

extern const char pyxxd[];
extern const size_t pyxxd_len;

const char pyxxd[] =
"#!/usr/bin/env python3\n"
"\n"
"import sys\n"
"import re\n"
"\n"
"def is_printable_ascii(byte):\n"
"    return byte >= ord(' ') and byte <= ord('~')\n"
"\n"
"def needs_escaping(byte):\n"
"    return byte == ord('\\\"') or byte == ord('\\\\')\n"
"\n"
"def stringify_nibble(nibble):\n"
"    if nibble < 10:\n"
"        return chr(nibble + ord('0'))\n"
"    return chr(nibble - 10 + ord('a'))\n"
"\n"
"def write_byte(of, byte):\n"
"    if is_printable_ascii(byte):\n"
"        if needs_escaping(byte):\n"
"            of.write('\\\\')\n"
"        of.write(chr(byte))\n"
"    elif byte == ord('\\n'):\n"
"        of.write('\\\\n\"\\n\"')\n"
"    else:\n"
"        of.write('\\\\x')\n"
"        of.write(stringify_nibble(byte >> 4))\n"
"        of.write(stringify_nibble(byte & 0xf))\n"
"\n"
"def mk_valid_identifier(s):\n"
"    s = re.sub('^[^_a-z]', '_', s)\n"
"    s = re.sub('[^_a-z0-9]', '_', s)\n"
"    return s\n"
"\n"
"def main():\n"
"    # `xxd -i` compatibility\n"
"    if len(sys.argv) != 4 or sys.argv[1] != \"-i\":\n"
"        print(\"Usage: xxd -i infile outfile\")\n"
"        exit(2)\n"
"\n"
"    with open(sys.argv[2], \"rb\") as infile:\n"
"        with open(sys.argv[3], \"w\") as outfile:\n"
"\n"
"            identifier = mk_valid_identifier(sys.argv[2]);\n"
"            outfile.write('#include <stddef.h>\\n\\n');\n"
"            outfile.write('extern const char {}[];\\n'.format(identifier));\n"
"            outfile.write('extern const size_t {}_len;\\n\\n'.format(identifier));\n"
"            outfile.write('const char {}[] =\\n\"'.format(identifier));\n"
"\n"
"            while True:\n"
"                byte = infile.read(1)\n"
"                if byte == b\"\":\n"
"                    break\n"
"                write_byte(outfile, ord(byte))\n"
"\n"
"            outfile.write('\";\\n\\n');\n"
"            outfile.write('const size_t {}_len = sizeof({}) - 1;\\n'.format(identifier, identifier));\n"
"\n"
"if __name__ == '__main__':\n"
"    main()\n"
"";

const size_t pyxxd_len = sizeof(pyxxd) - 1;

用法（这会提取脚本）：

#include <stdio.h>

extern const char pyxxd[];
extern const size_t pyxxd_len;

int main()
{
    fwrite(pyxxd, 1, pyxxd_len, stdout);
}

score 1 · Accepted Answer

这是我用于 Visual C++ 的 hack。我添加了以下 Pre-Build Event（其中 file.txt 是输入，file_txt.h 是输出）：

@(
  echo const char text[] = R"***(
  type file.txt
  echo ^^^)***";
) > file_txt.h

然后我在需要的地方包含 file_txt.h。

这并不完美，因为它在开头添加 \n 并在结尾添加 \n^，但这不是处理问题，我喜欢这个解决方案的简单性。如果任何人都可以改进以消除多余的字符，那就太好了。

score 0 · Accepted Answer

如果您执行以下操作，可能会起作用：

int main()
{
    const char* text = "
#include "file.txt"
";
    printf("%s", text);
    return 0;
}

当然，您必须小心文件中的实际内容，确保没有双引号，所有适当的字符都被转义等。

因此，如果您只是在运行时从文件中加载文本，或者将文本直接嵌入到代码中，可能会更容易。

如果你仍然想要另一个文件中的文本，你可以把它放在那里，但它必须在那里表示为一个字符串。您将使用上面的代码，但其中没有双引号。例如：

文件.txt

"Something evil\n"\
"this way comes!"

主文件

int main()
{
    const char* text =
#include "file.txt"
;
    printf("%s", text);
    return 0;
}

所以基本上在你包含的文本文件中有一个 C 或 C++ 样式的字符串。它会使代码更整洁，因为文件开头没有这么多的文本。

score 0 · Accepted Answer

即使它可以在编译时完成（我认为一般来说不能），文本也可能是预处理的标题而不是文件内容。我希望您必须在运行时从文件中加载文本或执行令人讨厌的剪切粘贴工作。

score 0 · Accepted Answer

Hasturkun 使用 xxd -i 选项的答案非常好。如果您想将转换过程（文本 -> 十六进制包含文件）直接合并到您的构建中，则 hexdump.c 工具/库最近添加了类似于 xxd 的 -i 选项的功能（它不会为您提供完整的标题 - 您需要提供 char 数组定义 - 但这样做的好处是可以让您选择 char 数组的名称）：

http://25thandclement.com/~william/projects/hexdump.c.html

它的许可证比 xxd 更“标准”并且非常自由 - 使用它在程序中嵌入 init 文件的示例可以在此处的 CMakeLists.txt 和 scheme.c 文件中看到：

https://github.com/starseeker/tinyscheme-cmake

将生成的文件包含在源代码树和捆绑实用程序中都有利有弊 - 如何处理它取决于项目的特定目标和需求。hexdump.c 为这个应用程序打开了捆绑选项。

score 0 · Accepted Answer

我认为单独使用编译器和预处理器是不可能的。gcc 允许这样做：

#define _STRGF(x) # x
#define STRGF(x) _STRGF(x)

    printk ( MODULE_NAME " built " __DATE__ " at " __TIME__ " on host "
            STRGF(
#               define hostname my_dear_hostname
                hostname
            )
            "\n" );

但不幸的是不是这样：

#define _STRGF(x) # x
#define STRGF(x) _STRGF(x)

    printk ( MODULE_NAME " built " __DATE__ " at " __TIME__ " on host "
            STRGF(
#               include "/etc/hostname"
            )
            "\n" );

错误是：

/etc/hostname: In function ‘init_module’:
/etc/hostname:1:0: error: unterminated argument list invoking macro "STRGF"

score 0 · Accepted Answer

我有类似的问题，对于小文件，上述 Johannes Schaub 的解决方案对我来说就像一个魅力。

但是，对于稍大一些的文件，它会遇到编译器的字符数组限制问题。因此，我编写了一个小型编码器应用程序，将文件内容转换为大小相等的块（可能还有填充零）的二维字符数组。它生成带有二维数组数据的输出文本文件，如下所示：

const char main_js_file_data[8][4]= {
    {'\x69','\x73','\x20','\0'},
    {'\x69','\x73','\x20','\0'},
    {'\x61','\x20','\x74','\0'},
    {'\x65','\x73','\x74','\0'},
    {'\x20','\x66','\x6f','\0'},
    {'\x72','\x20','\x79','\0'},
    {'\x6f','\x75','\xd','\0'},
    {'\xa','\0','\0','\0'}};

其中 4 实际上是编码器中的变量 MAX_CHARS_PER_ARRAY。带有生成的 C 代码的文件（例如“main_js_file_data.h”）可以轻松内联到 C++ 应用程序中，例如：

#include "main_js_file_data.h"

这是编码器的源代码：

#include <fstream>
#include <iterator>
#include <vector>
#include <algorithm>


#define MAX_CHARS_PER_ARRAY 2048


int main(int argc, char * argv[])
{
    // three parameters: input filename, output filename, variable name
    if (argc < 4)
    {
        return 1;
    }

    // buffer data, packaged into chunks
    std::vector<char> bufferedData;

    // open input file, in binary mode
    {    
        std::ifstream fStr(argv[1], std::ios::binary);
        if (!fStr.is_open())
        {
            return 1;
        }

        bufferedData.assign(std::istreambuf_iterator<char>(fStr), 
                            std::istreambuf_iterator<char>()     );
    }

    // write output text file, containing a variable declaration,
    // which will be a fixed-size two-dimensional plain array
    {
        std::ofstream fStr(argv[2]);
        if (!fStr.is_open())
        {
            return 1;
        }
        const std::size_t numChunks = std::size_t(std::ceil(double(bufferedData.size()) / (MAX_CHARS_PER_ARRAY - 1)));
        fStr << "const char " << argv[3] << "[" << numChunks           << "]"    <<
                                            "[" << MAX_CHARS_PER_ARRAY << "]= {" << std::endl;
        std::size_t count = 0;
        fStr << std::hex;
        while (count < bufferedData.size())
        {
            std::size_t n = 0;
            fStr << "{";
            for (; n < MAX_CHARS_PER_ARRAY - 1 && count < bufferedData.size(); ++n)
            {
                fStr << "'\\x" << int(unsigned char(bufferedData[count++])) << "',";
            }
            // fill missing part to reach fixed chunk size with zero entries
            for (std::size_t j = 0; j < (MAX_CHARS_PER_ARRAY - 1) - n; ++j)
            {
                fStr << "'\\0',";
            }
            fStr << "'\\0'}";
            if (count < bufferedData.size())
            {
                fStr << ",\n";
            }
        }
        fStr << "};\n";
    }

    return 0;
}

score 0 · Accepted Answer

这个问题让我很恼火，并且 xxd 不适用于我的用例，因为当我尝试编写脚本时它使变量称为 __home_myname_build_prog_cmakelists_src_autogen 之类的东西，所以我制作了一个实用程序来解决这个确切的问题：

https://github.com/Exaeta/brcc

它生成一个源文件和头文件，并允许您显式设置每个变量的名称，以便您可以通过 std::begin(arrayname) 和 std::end(arrayname) 使用它们。

我将它合并到我的 cmake 项目中，如下所示：

add_custom_command(
  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/binary_resources.hpp ${CMAKE_CURRENT_BINARY_DIR}/binary_resources.cpp
  COMMAND brcc ${CMAKE_CURRENT_BINARY_DIR}/binary_resources RGAME_BINARY_RESOURCES_HH txt_vertex_shader ${CMAKE_CURRENT_BINARY_DIR}/src/vertex_shader1.glsl
  DEPENDS src/vertex_shader1.glsl)

通过小的调整，我想它也可以适用于 C。

score 0 · Accepted Answer

如果您正在使用CMake，您可能对编写CMake如下预处理脚本感兴趣：

cmake/ConvertLayout.cmake

function(convert_layout file include_dir)
    get_filename_component(name ${file} NAME_WE)
    get_filename_component(directory ${file} DIRECTORY)
    get_filename_component(directory ${directory} NAME)
    string(TOUPPER ${name} NAME)
    string(TOUPPER ${directory} DIRECTORY)

    set(new_file ${include_dir}/${directory}/${name}.h)

    if (${file} IS_NEWER_THAN  ${new_file})
        file(READ ${file} content)

        string(REGEX REPLACE "\"" "\\\\\"" content "${content}")
        string(REGEX REPLACE "[\r\n]" "\\\\n\"\\\\\n\"" content "${content}")
        set(content "\"${content}\"")
        set(content "#ifndef ${DIRECTORY}_${NAME}\n#define ${DIRECTORY}_${NAME} ${content} \n#endif")
        message(STATUS "${content}")

        file(WRITE ${new_file} "${content}")

        message(STATUS "Generated layout include file ${new_file} from ${file}")
    endif()
endfunction()

function(convert_layout_directory layout_dir include_dir)
    file(GLOB layouts ${layout_dir}/*)
    foreach(layout ${layouts})
        convert_layout(${layout} ${include_dir})
    endforeach()
endfunction()

你的 CMakeLists.txt

include(cmake/ConvertLayout.cmake)
convert_layout_directory(layout ${CMAKE_BINARY_DIR}/include)
include_directories(${CMAKE_BINARY_DIR}/include)

在 C++ 中的某个地方

#include "layout/menu.h"
Glib::ustring ui_info = LAYOUT_MENU;

score 0 · Accepted Answer

我喜欢@Martin R. 的回答，因为正如它所说，它不会触及输入文件并使过程自动化。为了改进这一点，我添加了自动拆分超出编译器限制的大文件的功能。输出文件被写成一个较小的字符串数组，然后可以在代码中重新组合。生成的脚本基于@Martin R. 的版本，此处包含一个示例：

https://github.com/skillcheck/cmaketools.git

c - "#include" C 程序中的文本文件作为 char[]

21 回答 21

Related

Reference