c - 用逗号分隔的数据的 C 解析器

Question

用 C 语言为以下格式的数据编写解析器的最有效方法是什么：

atr#1,atr#2,...,atr#n

btr#1,btr#2,...,btr#n

...

每条记录都在新行中，属性用逗号分隔。

应该使用什么功能？你有什么例子吗？

score 2 · Accepted Answer

下面是一些示例代码，它将逐行读取由换行符分隔的文件，然后拆分参数并将它们打印出来（您可以轻松地将其调整为，例如，将其解析为char *s 数组的数组）：

#include <stdio.h>
#include <string.h>

int main()
{
    FILE *f = fopen("file.txt", "r");
    char ptr[1024];
    char *token;

    while (fgets(ptr, 1024, f) != NULL)
    {
        token = strtok(ptr, ",");    
        while(token)
        {
            printf("Token: %s\n", token);
            token = strtok(NULL, ",");
        }
    }
    fclose(f);
    return 0;
}

score 0 · Accepted Answer

这将起作用：

/* You need the following includes and defines */
#include <stdio.h>
#include <iostream.h>
#include <string.h>
#define NULL_CHAR 0x0

int parse(char* data) {
    const int LINE_SIZE=255; /* Should be long enough for your unparsed data */
    const int MAX_FIELDS=99; /* Maximum number of fields */
    char  output[MAX_FIELDS][LINE_SIZE];
    int   i;
    int   output_field_count;
    int   output_char_idx;

    for (i = 0; i < MAX_FIELDS; i++) {
        strcpy(output[i], "");
    }
    output_field_count = 0;
    output_char_idx    = 0;

    for (i = 0; i < LINE_SIZE; i++) {
        if ((data[i] != ',')  && 
            (output_field_count    < MAX_FIELDS) && 
            ((output_char_idx+1) < LINE_SIZE)) {

            output[output_field_count][output_char_idx]   = data[i];
            output[output_field_count][output_char_idx+1] = NULL_CHAR;
            output_char_idx++;
        }
        else if (data[i] == ',') {
            output_field_count++;
            output_char_idx = 0;
        }
    }
    output_field_count++;
    output_char_idx = 0;

    printf("OUTPUT FIELD COUNT IS: %d\n", output_field_count);
    for (i = 0; i < output_field_count; i++) {
        printf("FIELD %i IS: %s\n", i, output[i]);
    }
    return 0;
}

这可以如下调用：

char data[500]; /* Should be long enough for your unparsed data */
strcpy(data, "atr#1,atr#2,...,atr#n");
parse(data);
strcpy(data, "btr#1,btr#2,...,btr#n");
parse(data);

score 0 · Accepted Answer

为工作选择合适的工具。它大约是 Perl、Python 或最好的 awk 中的一行代码。如果您有使用 C 的令人信服的理由，请在您的帖子中解释 - 否则我认为任何人都可以给您的最明智的答案是建议您为工作选择正确的工具，而不是询问如何用一种语言做一些繁重的事情那不好。

从命令行：

tr ',' '\n' < file.txt

将逗号变成新行。

c - 用逗号分隔的数据的 C 解析器

3 回答 3

Related

Reference