1

我有以下代码

while (*ptr != ',') {
    int items_read = sscanf(ptr, "%91[^,]%n", field, &n);
    printf("field = \"%s\"\n", field);
    field[0]='\0';
    if (items_read == 1)
        ptr += n; /* advance the pointer by the number of characters read */
    if ( *ptr != ',' ) {
        break; /* didn't find an expected delimiter, done? */
    }
    ++ptr; /* skip the delimiter */
}

它读取我的字符串并为在逗号之间找到的每个字符串创建一个新数组,但是我在字符串中间有空格。我有很多要读入的字符串,其中一些在多个区域(在那个中间位置之后)有空格,我不希望占到空格,只是第一个实例。

我知道我可以穿上|| ' '、和/或第二个while line,但我还没有做我需要做的事情。int lineif line

我听说使用strtok不好,或者至少有更好的方法可以做到这一点。我不知道如何添加额外的“标志”(如我的代码中的逗号)以绕过代码。

例如:
3,875,837
文件 1.ext 3,437,281 文件 2.txt

我已经开始做我想做的事了。

field1 = 3  
field2 = 875  
field3 = 837

但是,我希望这 2 个输入上的其余字符串进入单独的 char 数组并打印如下:

field4 = file1  
field5 = ext  

field4 = document  
field5 = two  
field6 = txt

或者,我现在在想,无论如何我可以提取最后一次迭代,char array field以便通过完全相同的代码使用另一个循环,除了用空格替换,s吗?' '

4

3 回答 3

1

我将如何做到这一点将取决于数据。我可能会读入一个字符串,可能在第一个空格用 strchr 将其解析为两个,然后用逗号进行第一个字符串拆分。

于 2012-04-19T20:21:44.453 回答
1

很大程度上取决于您要将数据用于什么目的。如何在代码等的其他地方使用它。

猜猜我经常在这样的解析中使用简单的迭代。如果您想使用sscanf它可能会有所帮助;举个例子:

#include <stdio.h>

int prnt_fields(char **ptr)
{
    char field[128];
    int n;
    int i = 0;

    printf("Item {\n");
    while (**ptr) {
        if(sscanf(*ptr, "%127[^\t\n., ]%n", field, &n) != 1)
            break;
        printf("  %d => '%s'\n", i++, field);
        (*ptr) += n;
        /* To handle multiple spaces tabs etc.: */
        if(sscanf(*ptr, "%127[ \t.,]%n", field, &n))
            (*ptr) += n;
        /* Or:
        n = 0;
        sscanf(*ptr, "%*[ \t.,]%n", &n);
        (*ptr) += n;
        */

        if (**ptr == '\n') {
            printf("} OK!\n");
            (*ptr)++;
            return **ptr != '\0';
        }
    }
    /* Should never end here unless input is invalid;
     * aka not separated with , \n or over long fields (>127) */
    printf("} something failed!\n");
    return 0;
}

int main(void)
{
    char *tst = "3,875,837 file1.ext\n"
        "6,875,847 file2.ext\n"
        "3,437,281 document two.txt\n"
        "9,991,123\tdrei.txt\n"
        "4,494,123        vier    fünf .    txt\n"
        ;
    char field[128];
    int n;
    char *ptr = tst;
    int i = 0;

    while (*ptr) {
        if(sscanf(ptr, "%127[^, \n.]%n", field, &n) != 1)
            break;
        printf("field%d = '%s'\n", i++, field);
        ptr += n+1;

        /* If one want to validate separator

        ptr += n;
        if (*ptr != ',' && *ptr != ' ' && *ptr != '\n')
            break;
        ++ptr;

        */
    }

    ptr = tst;

    printf("\n---------------\n");
    while (prnt_fields(&ptr) > 0)
        ;

    return 0;
}

应该给出如下内容:

field0 = '3'
field1 = '875'
field2 = '837'
...
field18 = '123  drei' <= breaks here
field19 = 'txt'
field20 = '4'
field21 = '494'
field22 = '123'
Item {
  0 => '3'
  1 => '875'
  2 => '837'
  3 => 'file1'
  4 => 'ext'
} OK!
Item {
  0 => '6'
  1 => '875'
  2 => '847'
  3 => 'file2'
  4 => 'ext'
} OK!
Item {
  0 => '3'
  1 => '437'
  2 => '281'
  3 => 'document'
  4 => 'two'
  5 => 'txt'
} OK!
Item {
  0 => '9'
  1 => '991'
  2 => '123'
  3 => 'drei'
  4 => 'txt'
} OK!
Item {
  0 => '4'
  1 => '494'
  2 => '123'
  3 => 'vier'
  4 => 'fünf'
  5 => 'txt'
} OK!

编辑:

好的。这可以做得更干净、更漂亮,但你可能会有一个想法:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

struct file_data {
    struct date {
        int y;
        int m;
        int d;
    } date;
    struct time {
        int h;
        int m;
    } time;
    int size_prt[8];    /* max 2^63 : 9223372036854775808 bytes */
    double size;
    char name[512];
    char ext[16];
    char ext_tr[16];
    char name_prt[32][128]; /* should be malloc or ptrs or done in func or.. */
};

/* Default */
static const struct file_data file_def = {
    {0,0,0},{0,0},{-1},0,{'\0'},{'\0'},{'\0'},{{'\0'}}
};

void prnt_filedata(FILE *fh, struct file_data fd)
{
    int i = 0;
    fprintf(fh,
        "File { \n"
        "  Date: %4d-%02d-%02d\n"
        "  Time: %02d:%02d\n"
        "  Size: %.f\n"
        "  Size: %.2f K\n"
        "  Size: %.2f M\n"
        "  Size: %.2f G\n"
        "  Name: '%s'\n"
        "  Ext : '%s'\n"
        "  ExtT: '%s'\n"
        "  Szpt: ",
        fd.date.y, fd.date.m, fd.date.d,
        fd.time.h, fd.time.m,
        fd.size,
        (fd.size / (1 << 10)),
        (fd.size / (1 << 20)),
        (fd.size / (1 << 30)),
        fd.name,
        fd.ext,
        fd.ext_tr
        );
    while (fd.size_prt[i] != -1)
        fprintf(fh, "%d ", fd.size_prt[i++]);
    fprintf(fh, "\n  Fprt: ");
    i = 0;
    while (*fd.name_prt[i])
        fprintf(fh, "'%s' ", fd.name_prt[i++]);
    fprintf(fh, "\n}\n");
}

int extr_file_data(char **ptr, struct file_data *fd)
{
    int i;
    int n;
    char size[26];
    char name[512];
    char *p;
    *fd = file_def;

    while (**ptr) {
        if (sscanf(*ptr,
            "%4d-%2d-%2d %d:%d %25[0123456789,] %511[^\n]%n",
            &fd->date.y, &fd->date.m, &fd->date.d,
            &fd->time.h, &fd->time.m,
            size, name, &n) != 7) {
            fprintf(stderr,
                " * ERR; Unable to extract from %s\n",
                *ptr);
            return 0;
        }
        (*ptr) += n;
        p = size;
        i = 0;
        /* Size parts + total */
        while (*p && i < 7) {
            fd->size_prt[i] = (int)strtol(p, &p, 0);
            fd->size *= 1000;
            fd->size += fd->size_prt[i++];
            if (*p) ++p;
        }
        fd->size_prt[i] = -1;   /* flag next to last  */
        /* get .ext */
        if ((p = strrchr(name, '.')) != NULL) {
            strncpy(fd->name, name, p - name);
            strncpy(fd->ext, p + 1, 16);
            /* trimmed ext */
            if (sscanf(fd->ext, "%15s", fd->ext_tr) != 1)
                *fd->ext_tr = '\0';
        } else {
            strncpy(fd->name, name, 511);
        }
        /* get trimmed parts of name */
        p = fd->name;
        i = 0;
        while (sscanf(p, "%127s%n", fd->name_prt[i++], &n) == 1 && i < 32)
            p+=n;
            ;
        *fd->name_prt[n] = '\0';

        if (**ptr == '\n')
            (*ptr)++;
        return **ptr != '\0';
    }
    return 0;
}

int main(void)
{
    char *tst =
        "2012-04-18 13:28                  32 ein.ext\n"
        "2012-04-18 13:28       2,446,875,847 zwei.xt  \n"
        "2012-04-18 13:28                   0 drei  .xt\n"
        "2012-04-18 13:28 7,694,587,183,883,665 vier fünf.txt\n"
        "2012-04-18 13:28 9,991,123\t\tsechs\n"
        "2012-04-18 13:28 4,494,123        sieben     acht  .   txt\n"
        ;
    char *ptr = tst;
    struct file_data fd;

    while (extr_file_data(&ptr, &fd) > 0)
        prnt_filedata(stdout, fd);
    prnt_filedata(stdout, fd);

    return 0;
}

应该给:

...
File { 
  Date: 2012-04-18
  Time: 13:28
  Size: 2446875847
  Size: 2389527.19 K
  Size: 2333.52 M
  Size: 2.28 G
  Name: 'zwei'
  Ext : 'xt  '
  ExtT: 'xt'
  Szpt: 2 446 875 847 
  Fprt: 'zwei' 
}
...
File { 
  Date: 2012-04-18
  Time: 13:28
  Size: 4494123
  Size: 4388.79 K
  Size: 4.29 M
  Size: 0.00 G
  Name: 'sieben     acht  '
  Ext : '   txt'
  ExtT: 'txt'
  Szpt: 4 494 123 
  Fprt: 'sieben' 'acht' 
}
...

编辑(再次);抱歉,只有一个奇怪的转变,我在测试后忘记改变了。

于 2012-04-19T17:32:47.433 回答
1

你可以看看strcspn()方法。

作为一个很好的副作用,它可以实现多字节字符安全。

于 2012-04-19T17:50:46.250 回答