1

例如:

输入(字符串):foo $$ foo ## foo []

搜索(字符串):foo

输出(数组):$$ ,## ,[]

我尝试过这个:

char * str = "foo $$ foo ## foo []";
    char * s = "foo";

    int buf_len = 0;
    int len = strlen(s);
    int i = 0;

    char ** buffer = malloc(MAX_BUFFER_SIZE);
    char * tmpbuf = malloc(MAX_BUFFER_SIZE);
    char * p = str;
    char ** buf = buffer;
    char * tbuf = tmpbuf;

    while(*p)
    {
        if(*p == *s) 
        {
            while(*p == *(s + i)) 
            { 
                i++;
                p++;
            }

            if(i == len) 
            {
                *buf ++ = tbuf;
                memset(tbuf,0,buf_len);
                i = buf_len = 0;
            }
        }
        else 
        {
            *tbuf ++= *p;
            buf_len ++;
        }

        p++;
    }

    *buf ++= NULL;

    int x;
    for(x = 0; buffer[x]; x++)
    {
        printf("%s\n", buffer[x]);
    }

    free(buffer);
    free(tmpbuf);

显示以下输出:

$$ ## []
## []
[]

但预期是:

$$  
##  
[]

如何解决这个问题?

4

5 回答 5

5

这是一个将字符串拆分为字符串数组的函数:

#include <assert.h>
#include <string.h>

/*
 * Split a string by a delimiter.
 *
 * This function writes the beginning of each item to @pointers_out
 * (forming an array of C strings), and writes the actual string bytes
 * to @bytes_out.  Both buffers are assumed to be big enough for all of the
 * strings.
 *
 * Returns the number of strings written to @pointers_out.
 */
size_t explode(const char *delim, const char *str,
               char **pointers_out, char *bytes_out)
{
    size_t  delim_length        = strlen(delim);
    char   **pointers_out_start = pointers_out;

    assert(delim_length > 0);

    for (;;) {
        /* Find the next occurrence of the item delimiter. */
        const char *delim_pos = strstr(str, delim);

        /*
         * Emit the current output buffer position, since that is where the
         * next item will be written.
         */
        *pointers_out++ = bytes_out;

        if (delim_pos == NULL) {
            /*
             * No more item delimiters left.  Treat the rest of the input
             * string as the last item.
             */
            strcpy(bytes_out, str);
            return pointers_out - pointers_out_start;
        } else {
            /*
             * Item delimiter found.  The bytes leading up to it form the next
             * string.
             */
            while (str < delim_pos)
                *bytes_out++ = *str++;

            /* Don't forget the NUL terminator. */
            *bytes_out++ = '\0';

            /* Skip over the delimiter. */
            str += delim_length;
        }
    }
}

用法:

#include <stdio.h>
/* ... */

#define BIG_ENOUGH 1000

int main(void)
{
    char    *items[BIG_ENOUGH];
    char     item_bytes[BIG_ENOUGH];
    size_t   i;
    size_t   count;

    count = explode("foo", "foo $$ foo ## foo []", items, item_bytes);

    for (i = 0; i < count; i++)
        printf("\"%s\"\n", items[i]);

    return 0;
}

输出:

""
" $$ "
" ## "
" []"

这不会产生您要求的确切输出,因为我不确定您要如何处理"foo"字符串开头的周围空格和项目分隔符(在您的示例中为 )的出现。相反,我模仿了PHP 的explode函数。


我想指出我的explode函数是如何影响内存管理的。由调用者确保缓冲区足够大。这对于快速脚本来说很好,但在更严肃的程序中可能会很烦人,你必须做一些数学才能正确使用这个函数。我本可以编写一个更“健壮”的实现来执行自己的分配,但是:

  • 这会使实施混乱。

  • 它没有给调用者使用他们自己的内存分配器的选项。

所以实现explode我做的方式是“不好的”,因为它很难正确使用,更糟糕的是,容易使用不正确。另一方面,它是“好的”,因为它将功能和内存管理的关注点分开。

于 2012-04-03T21:09:30.250 回答
3

tbuf这是因为buf当您说:

*buf ++ = tbuf;

您所做的是保存对当前位置的引用tbuf(或者tmpbuf如果您喜欢)。

tmpbuf填充除分隔符以外的所有内容。

在循环结束时类似于:

          01234567 <- offset
tmpbuf = "$$ ## []"

buf[0] = tmpbuf+0;
buf[1] = tmpbuf+3;
buf[2] = tmpbuf+6;

或者非常简化的内存表:

        memory
       address        value   
tmpbuf -> 0x01       [   $] <- buffer[0] points here
          0x02       [   $]
          0x03       [    ]
          0x04       [   #] <- buffer[1] points here
          0x05       [   #]
          0x06       [    ]
          0x07       [   [] <- buffer[2] points here
          0x08       [   ]]
          0x09       [    ]
          ...
buffer -> 0x3A       [0x01]
          0x3B       [0x04]
          0x3C       [0x07]
          0x3D       [    ]
          0x3E       [    ]
          ...

编辑

为了它的奔腾;指针,动态,方式,不使用strstr().

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int is_needle(char *hay, char *needle)
{
    while (*hay && *++hay == *++needle);
    return *needle == '\0';
}

char *find(char *hay, char *needle)
{
    while (*hay) {
        if (*hay == *needle && is_needle(hay, needle))
            return hay;
        ++hay;
    }
    return hay;
}

int pushstr(char ***vs, size_t *vslen, char *val, size_t slen)
{
    char **vsp = *vs + *vslen;

    if ((*vsp = realloc(*(*vs + *vslen), slen + 1)) == NULL) {
        perror("pushstr.1"); exit(1);
    }

    memcpy(*vsp, val, slen);
    *(*vsp + slen) = '\0';

    if ((*vs  = realloc(*vs, sizeof(char*) * (++*vslen + 1))) == NULL) {
        perror("pushstr.2"); exit(1);
    }
    *(*vs + *vslen) = NULL;

    return *vslen;
}

int main(void)
{
    char *hay    = "foo $$ foo ## foo [] fox @@ foo ??";
    char *needle = "foo";
    char *np;
    char **vs;
    size_t vslen = 0;
    size_t nlen  = strlen(needle);

    if ((vs = malloc(sizeof(char*))) == NULL) {
        perror("main");
        return 1;
    }
    *vs = NULL;

    while (*(np = find(hay, needle))) {
        if (np != hay) {
            pushstr(&vs, &vslen, hay, np - hay);
            hay = np + nlen;
        } else {
            hay += nlen;
        }
    }
    if (np != hay)
        pushstr(&vs, &vslen, hay, np - hay);

    while (*vs)
        printf("V: '%s'\n", *vs++);
    vs -= vslen;

    while (*vs)
        free(*vs++);
    vs -= vslen;
    free(vs);

    return 0;
}
于 2012-04-03T21:58:46.200 回答
1

这是一个任务strstr()。我稍微更改了您的代码以使用它。

int add_to_buf(char *str, size_t len, char ***buf)
{
  if (len <= 0) return 0;
  **buf = malloc (len);
  strncpy (**buf, str, len);
  ++*buf;
  return 1;
}

int main()
{
  char *str = "foo $$ foo ## foo []";
  char *s = "foo";

  char **buffer = malloc (MAX_BUFFER_SIZE*sizeof(*buffer)), **buf = buffer;
  char *start, *end;

  int s_len = strlen (s);

  start = str;
  end = strstr (str, s);
  while (end) {
    add_to_buf (start, end-start, &buf);
    start = end + s_len;
    end = strstr (start, s);
  }
  add_to_buf (start, strlen (str) - (start-str), &buf);
  *buf = 0;

  for (buf = buffer; *buf; ++buf)
      printf ("%s\n", *buf);

  free (buffer);
  return 0;
}
于 2012-04-03T20:50:25.937 回答
1

您为一个简单的程序使用了太多的指针,而您使用它们的方式使其难以理解。我看到的一个直接错误是您正在使用buffer**(字符串数组),但您只分配了一个字符串。你就是这个用来存储令牌的字符串数组,它会在某处造成一些内存冲突。

由于您要打印令牌,因此无需将它们存储在单独的数组中。这将做:

#include<stdio.h>
#include<string.h>

int main(int ac, char*argv[]) {
char str[] = "foo $$ foo ## foo []";
char * s = "foo";   
char *p;

p = strtok(str, " "); // tokenize

while(p!=NULL)
{
if(strcmp(p, s)) //print non matching tokens
printf("%s\n", p);
p = strtok(NULL, " ");
}

return 0;
}

请注意,这里的分隔符是空格,这使得这里更容易。

于 2012-04-03T20:56:01.767 回答
0

strtok函数是为此任务设计的:

#include <string.h>
...
char *token;
char *line = "LINE TO BE SEPARATED";
char *search = " ";


/* Token will point to "LINE". */
token = strtok(line, search);


/* Token will point to "TO". */
token = strtok(NULL, search);
于 2012-04-03T20:29:28.607 回答