8

在 C 中去除前导和尾随空格的最佳方法是什么?

4

16 回答 16

24

以下是 linux 内核如何进行修剪,称为 strstrip():

char *strstrip(char *s)
{
    size_t size;
    char *end;

    size = strlen(s);

    if (!size)
        return s;

    end = s + size - 1;
    while (end >= s && isspace(*end))
        end--;
    *(end + 1) = '\0';

    while (*s && isspace(*s))
        s++;

    return s;
}

它基本上是前张海报所说的格式更好且经过错误检查的版本。

于 2008-12-09T08:27:30.663 回答
9

这个问题看起来好像是一个家庭作业问题,所以我会斜着回答:查找 isspace(3) 和 strlen(3) 的手册页并使用指针算法。此外,根据手头的问题,您可能需要 malloc(3) 为结果保留空间。

不要忘记 C 字符串的表示包括一个尾随 0 字节,通常写为 '\0',它不计入字符串长度的一部分。

于 2008-12-09T08:03:24.173 回答
8

这是使用 isspace 的版本:

char * trim(char *c) {
    char * e = c + strlen(c) - 1;
    while(*c && isspace(*c)) c++;
    while(e > c && isspace(*e)) *e-- = '\0';
    return c;
}
于 2008-12-09T08:52:53.747 回答
5

您可以完全就地执行此操作。

 void stripLeadingAndTrailingSpaces(char* string){

     assert(string);

     /* First remove leading spaces */

     const char* firstNonSpace = string;

     while(*firstNonSpace != '\0' && isspace(*firstNonSpace))
     {
          ++firstNonSpace;
     }

     size_t len = strlen(firstNonSpace)+1;         

     memmove(string, firstNonSpace, len);

     /* Now remove trailing spaces */

     char* endOfString = string + len;

     while(string < endOfString  && isspace(*endOfString))
     {
          --endOfString ;
     }

     *endOfString = '\0';

}
于 2008-12-09T08:32:46.097 回答
3
char *strstrip(char *s)
{
    char *end;

    while ( (*s) && isspace( *s))
        s++;

    if(!( *s) )
        return s;
    end = s;

    while( ! *end)
        end++;
    end--;

    while (end ! = s && isspace( *end))
        end--;
    *(end + 1) = '\0';

    return s;
}

它基本上是一个更优化的代码(在速度和代码大小方面)。

如果我们需要保留内存空间,

void strstrip(char *s)
{
    char *start;
    char *end;

    start = s; 
    while ( (*start) && isspace( *start))
        start++;

    if(!( *start) ) 
    {
        *s='\0';
        return ;
    }
    end = start;

    while( ! *end)
        end++;
    end--;

    while (end ! = start && isspace( *end))
        end--;
    *(end + 1) = '\0';

    memmove(s, start, end-start+1);

    return;
}
于 2008-12-09T09:19:24.817 回答
3

这是 lakshmanaraj 的第一个函数的更简洁和更安全的版本:

#include <ctype.h>
char *mytrim(char *s)
{
    if(s) { /* Don't forget to check for NULL! */
        while(*s && isspace(*s))
            ++s;
        if(*s) {
            register char *p = s;
            while(*p)
                ++p;
            do {
                --p;
            } while((p != s) && isspace(*p));
            *(p + 1) = '\0';
        }
    }
    return(s);
}
于 2009-01-05T09:52:36.390 回答
2

对上面另一篇文章的改进。

void  strstrip( char *s )
{
  char *start;
  char *end;

  // Exit if param is NULL pointer
  if (s == NULL)
    return;

  // Skip over leading whitespace
  start = s;
  while ((*start) && isspace(*start))
    start++;      

  // Is string just whitespace?
  if (!(*start)) 
  {         
    *s = 0x00; // Truncate entire string
    return;     
  }     

  // Find end of string
  end = start;
  while (*end)         
    end++;     

  // Step back from NUL
  end--;      

  // Step backward until first non-whitespace
  while ((end != start) && isspace(*end))         
    end--;     

  // Chop off trailing whitespace
  *(end + 1) = 0x00;

  // If had leading whitespace, then move entire string back to beginning
  if (s != start)
    memmove(s, start, end-start+1);      

  return; 
} 
于 2011-01-06T03:20:27.397 回答
2

对于那些希望看到递归解决方案的人,我提供:

static char* trim_left_ptr(char* str)
{
    if (*str == 0)
    {
        // no more in string. It is an empty string
        return str;
    }

    if (*str == ' ' || *str == '\t')
    {
        // it is space or tab. Try next.
        return trim_left_ptr(str + 1);
    }


    // found left side of string
    return str;
}


static char* trim_right_ptr(char* str)
{
    if (*str == 0)
    {
        // found end of string
        return str;
    }

    // try next (recursion)
    char* ptr = trim_right_ptr( str + 1 );

    // on the return from recursion.
    // ptr == str until a nonspace/nontab is found.
    if (*(ptr - 1) == ' ' || *(ptr - 1) == '\t')
    {
        // is space or tab. Have not yet found right side
        return ptr - 1;
    }

    // found right side of string
    return ptr;
}



char* trim(char* str)
{
    char* L_Ptr = trim_left_ptr(str);
    char* R_Ptr = trim_right_ptr(str);

    // calculate characters to store in new buffer
    _int32 sz = R_Ptr - L_Ptr;

    // allocate buffer
    char* newstr = (char*) malloc(sz + 1);

    // copy trimmed string
    memcpy(newstr, L_Ptr, sz);

    // terminate string
    *(newstr + sz) = 0;

    return newstr;
}

当然,它不是唯一可能的递归解决方案。

于 2013-06-26T00:10:34.073 回答
2

为已经拥挤的领域添加另一个答案,但是……我相信有充分的理由。具体来说,AlfaZulu的(当前接受的)答案既没有去除尾随空白,也没有正确尊重数组的边界。 当源字符串为空字符串时,Valgrind会报告越界读取和写入。

stripLeadingAndTrailingSpaces()这是来自 AlfaZulu 逐字回答的函数的示例代码(包括尾随空格)——只是static在它之前添加了符合我的偏见。(我使用编译器选项来阻止代码编译,除非函数有原型或者函数是静态的)。还有一个函数str_strip()是 AlfaZulu 函数的固定版本。测试工具使这两个功能发挥作用。该代码假定了一个足够类似于 POSIX 的环境,因此strdup()可用于分配字符串的副本。

请注意,该名称str_strip()避免与标准 C 库的保留名称冲突:

7.31.13 字符串处理<string.h>

str1 以、mem或和小写字母开头的函数名称wcs可以添加到<string.h>标头中的声明中。

#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>

/* Code verbatim from answer by AlfaZulu (except for static) — which has problems */
static
void stripLeadingAndTrailingSpaces(char* string){

     assert(string);

     /* First remove leading spaces */

     const char* firstNonSpace = string;

     while(*firstNonSpace != '\0' && isspace(*firstNonSpace))
     {
          ++firstNonSpace;
     }

     size_t len = strlen(firstNonSpace)+1;

     memmove(string, firstNonSpace, len);

     /* Now remove trailing spaces */

     char* endOfString = string + len;

     while(string < endOfString  && isspace(*endOfString))
     {
          --endOfString ;
     }

     *endOfString = '\0';

}

static void str_strip(char *string)
{
    assert(string);
    //printf("-->> %s(): [%s]\n", __func__, string);

    /* First remove leading spaces */
    const char *firstNonSpace = string;

    while (isspace((unsigned char)*firstNonSpace))
        ++firstNonSpace;
    //printf("---- %s(): [%s]\n", __func__, firstNonSpace);

    size_t len = strlen(firstNonSpace) + 1;
    memmove(string, firstNonSpace, len);
    //printf("---- %s(): [%s]\n", __func__, string);

    /* Now remove trailing spaces */
    char *endOfString = string + len - 1;
    //printf("---- %s(): EOS [%s]\n", __func__, endOfString);

    while (string < endOfString && isspace((unsigned char)endOfString[-1]))
        --endOfString;
    *endOfString = '\0';
    //printf("<<-- %s(): [%s]\n", __func__, string);
}

static void chk_stripper(const char *str)
{
    char *copy1 = strdup(str);
    printf("V1 Before: [%s]\n", copy1);
    stripLeadingAndTrailingSpaces(copy1);
    printf("V1 After:  [%s]\n", copy1);
    free(copy1);
    fflush(stdout);

    char *copy2 = strdup(str);
    printf("V2 Before: [%s]\n", copy2);
    str_strip(copy2);
    printf("V2 After:  [%s]\n", copy2);
    free(copy2);
    fflush(stdout);
}

int main(void)
{
    char *str[] =
    {
        "    \t    ABC   DEF    \t  ",
        "    \t                 \t  ",
        " ",
        "",
    };
    enum { NUM_STR = sizeof(str) / sizeof(str[0]) };
    for (int i = 0; i < NUM_STR; i++)
        chk_stripper(str[i]);
    return 0;
}

在 Valgrind 下运行时,我得到输出:

$ valgrind --suppressions=etc/suppressions-macos-10.12.5 -- ./slts59
==26999== Memcheck, a memory error detector
==26999== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==26999== Using Valgrind-3.13.0.SVN and LibVEX; rerun with -h for copyright info
==26999== Command: ./slts59
==26999== 
V1 Before: [            ABC   DEF         ]
V1 After:  [ABC   DEF         ]
V2 Before: [            ABC   DEF         ]
V2 After:  [ABC   DEF]
V1 Before: [                              ]
V1 After:  []
V2 Before: [                              ]
V2 After:  []
V1 Before: [ ]
V1 After:  []
V2 Before: [ ]
V2 After:  []
==26999== Invalid read of size 1
==26999==    at 0x100000B81: stripLeadingAndTrailingSpaces (slts59.c:28)
==26999==    by 0x100000CB0: chk_stripper (slts59.c:67)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999==  Address 0x100b7df01 is 0 bytes after a block of size 1 alloc'd
==26999==    at 0x100096861: malloc (vg_replace_malloc.c:302)
==26999==    by 0x1002DC938: strdup (in /usr/lib/system/libsystem_c.dylib)
==26999==    by 0x100000C88: chk_stripper (slts59.c:65)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999== 
==26999== Invalid write of size 1
==26999==    at 0x100000B96: stripLeadingAndTrailingSpaces (slts59.c:33)
==26999==    by 0x100000CB0: chk_stripper (slts59.c:67)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999==  Address 0x100b7df01 is 0 bytes after a block of size 1 alloc'd
==26999==    at 0x100096861: malloc (vg_replace_malloc.c:302)
==26999==    by 0x1002DC938: strdup (in /usr/lib/system/libsystem_c.dylib)
==26999==    by 0x100000C88: chk_stripper (slts59.c:65)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999== 
V1 Before: []
V1 After:  []
V2 Before: []
V2 After:  []
==26999== 
==26999== HEAP SUMMARY:
==26999==     in use at exit: 34,572 bytes in 162 blocks
==26999==   total heap usage: 186 allocs, 24 frees, 40,826 bytes allocated
==26999== 
==26999== LEAK SUMMARY:
==26999==    definitely lost: 0 bytes in 0 blocks
==26999==    indirectly lost: 0 bytes in 0 blocks
==26999==      possibly lost: 0 bytes in 0 blocks
==26999==    still reachable: 0 bytes in 0 blocks
==26999==         suppressed: 34,572 bytes in 162 blocks
==26999== 
==26999== For counts of detected and suppressed errors, rerun with: -v
==26999== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 2 from 2)
$

显示的str_strip()功能正常工作。它具有或多或少的最小更改集,stripLeadingAndTrailingSpaces()以干净地工作(加上注释掉在检查过程中使用的调试代码——现在可以去)。

我观察到虽然语义略有不同,但Tuminoid答案strstrip()中基于 Linux 的函数也很干净——没有内存访问错误,并且它删除了尾随空格(不移动第一个和最后一个非空格之间的字符串部分人物)。

于 2017-08-10T17:41:17.880 回答
1
int i = strlen(s) - 1;
while (isspace(s[i]))
    s[i--] = '\0';
while (isspace(*s))
    s++;

只要您不关心疯狂地破坏字符串并且不关心内存泄漏,那应该可以解决问题!

于 2008-12-09T08:04:30.537 回答
1

您应该能够就地完成;剥离空格永远不会导致字符串增长。您可能无需先检查字符串的长度就可以做到这一点,但这样做可能是不必要的“聪明”。memmove()除了@Norman Ramsey 提到的功能之外,您还应该研究该功能。

于 2008-12-09T08:10:00.667 回答
1

如果您在 Linux/Windows 上并且将库 glib 链接到您的程序中,则可以使用例程g_strstrip().

于 2009-01-05T06:08:33.457 回答
1
char *strip(char *string)
{
    char *start = string;
    while(isblank(*start)) start++;
    int end = strlen(start);
    if(start != string) {
        memmove(string, start, end);
        string[end] = '\0';
    }
    while(isblank(*(string+end-1))) end--;
    string[end] = '\0';
    return string;
}
于 2014-01-17T19:26:43.000 回答
1

来自 fpsgamer 的更正算法(也适用于 ISO C90):

void trimWhitespace(char *string) {
    const char* firstNonSpace = string;
    char* endOfString;
    size_t len;

    if (string[0] == '\0') {
        return;
    }

    /* First remove leading spaces */
    while(*firstNonSpace != '\0' && isspace(*firstNonSpace)) {
        ++firstNonSpace;
    }
    len = strlen(firstNonSpace) + 1;
    memmove(string, firstNonSpace, len);

    /* Now remove trailing spaces */
    endOfString = string + len;

    while(string < endOfString && (isspace(*endOfString) || *endOfString == '\0')) {
        --endOfString ;
    }

    *(endOfString + 1) = '\0';
}
于 2014-04-22T23:22:55.790 回答
1

对于尾随空格,请使用 strtok。设置分隔符 = "" 并在运行时丢弃分隔符字节并将 char * 返回给令牌

char *x;
char *y = "somestring "; 

x = strtok(y," ");

结果 x = 指向“somestring”而不是“somestring”的指针

于 2015-10-16T20:27:49.887 回答
1

编辑:根据最新版本的zString 库更新了代码。

这段代码不依赖库,只依赖指针算术和整数。共有三个功能:修剪,左修剪和右修剪。(我应该将所有这些函数添加到zString 库中:))

  • char *zstring_trim(char *s)删除前导和尾随空格

  • char *zstring_ltrim(char *s)删除前导空格

  • char *zstring_ltrim(char *s)删除尾随空格

所有这些函数都会修改原始字符串

/* trim */
char *zstring_trim(char *str){
    char *src=str;  /* save the original pointer */
    char *dst=str;  /* result */
    char c;
    int is_space=0;
    int in_word=0;  /* word boundary logical check */
    int index=0;    /* index of the last non-space char*/

    /* validate input */
    if (!str)
        return str;

    while ((c=*src)){
        is_space=0;

        if (c=='\t' || c=='\v' || c=='\f' || c=='\n' || c=='\r' || c==' ')
            is_space=1;

        if(is_space == 0){
         /* Found a word */
            in_word = 1;
            *dst++ = *src++;  /* make the assignment first
                               * then increment
                               */
        } else if (is_space==1 && in_word==0) {
         /* Already going through a series of white-spaces */
            in_word=0;
            ++src;
        } else if (is_space==1 && in_word==1) {
         /* End of a word, dont mind copy white spaces here */
            in_word=0;
            *dst++ = *src++;
            index = (dst-str)-1; /* location of the last char */
        }
    }

    /* terminate the string */
    *(str+index)='\0';

    return str;
}

/* right trim */
char *zstring_rtrim(char *str){
    char *src=str;  /* save the original pointer */
    char *dst=str;  /* result */
    char c;
    int is_space=0;
    int index=0;    /* index of the last non-space char */

    /* validate input */
    if (!str)
        return str;

    /* copy the string */
    while(*src){
        *dst++ = *src++;
        c = *src;

        if (c=='\t' || c=='\v' || c=='\f' || c=='\n' || c=='\r' || c==' ')
            is_space=1;
        else
            is_space=0;

        if (is_space==0 && *src)
            index = (src-str)+1;
    }

    /* terminate the string */
    *(str+index)='\0';

    return str;
}

/* left trim */
char *zstring_ltrim(char *str){
    char *src=str;  /* save the original pointer */
    char *dst=str;  /* result */
    char c;
    int index=0;    /* index of the first non-space char */

    /* validate input */
    if (!str)
        return str;

    /* skip leading white-spaces */
    while((c=*src)){ 
        if (c=='\t' || c=='\v' || c=='\f' || c=='\n' || c=='\r' || c==' '){
            ++src;
            ++index;
        } else
            break;
    }

    /* copy rest of the string */
    while(*src)
        *dst++ = *src++;

    /* terminate the string */
    *(src-index)='\0';

    return str;
}
于 2016-02-25T02:05:15.373 回答