Best algorithm to strip leading and trailing spaces in C [duplicate]

北战南征 提交于 2019-11-27 14:20:42

问题


What is the best approach in stripping leading and trailing spaces in C?


回答1:


You can do this entirely in place.

 void stripLeadingAndTrailingSpaces(char* string){

     assert(string);

     /* First remove leading spaces */

     const char* firstNonSpace = string;

     while(*firstNonSpace != '\0' && isspace(*firstNonSpace))
     {
          ++firstNonSpace;
     }

     size_t len = strlen(firstNonSpace)+1;         

     memmove(string, firstNonSpace, len);

     /* Now remove trailing spaces */

     char* endOfString = string + len;

     while(string < endOfString  && isspace(*endOfString))
     {
          --endOfString ;
     }

     *endOfString = '\0';

}



回答2:


Here is how linux kernel does the trimming, called strstrip():

char *strstrip(char *s)
{
    size_t size;
    char *end;

    size = strlen(s);

    if (!size)
        return s;

    end = s + size - 1;
    while (end >= s && isspace(*end))
        end--;
    *(end + 1) = '\0';

    while (*s && isspace(*s))
        s++;

    return s;
}

Its basically a better formatted and error-checked version what the previous poster said.




回答3:


This question looks as if it might be a homework question, so I'll answer obliquely: look up the man pages for isspace(3) and strlen(3) and use pointer arithmetic. Also depending on the problem at hand you may need malloc(3) to hold space for the result.

Don't forget that the representation of a C string includes a trailing 0 byte, often written '\0', which is not counted as part of the length of the string.




回答4:


Here's a version using isspace:

char * trim(char *c) {
    char * e = c + strlen(c) - 1;
    while(*c && isspace(*c)) c++;
    while(e > c && isspace(*e)) *e-- = '\0';
    return c;
}



回答5:


char *strstrip(char *s)
{
    char *end;

    while ( (*s) && isspace( *s))
        s++;

    if(!( *s) )
        return s;
    end = s;

    while( ! *end)
        end++;
    end--;

    while (end ! = s && isspace( *end))
        end--;
    *(end + 1) = '\0';

    return s;
}

It is basically a more optimized code (in terms of speed & codesize ).

If we need to retain memory space then,

void strstrip(char *s)
{
    char *start;
    char *end;

    start = s; 
    while ( (*start) && isspace( *start))
        start++;

    if(!( *start) ) 
    {
        *s='\0';
        return ;
    }
    end = start;

    while( ! *end)
        end++;
    end--;

    while (end ! = start && isspace( *end))
        end--;
    *(end + 1) = '\0';

    memmove(s, start, end-start+1);

    return;
}



回答6:


Here is a more concise and safer version of lakshmanaraj's first function:

#include <ctype.h>
char *mytrim(char *s)
{
    if(s) { /* Don't forget to check for NULL! */
        while(*s && isspace(*s))
            ++s;
        if(*s) {
            register char *p = s;
            while(*p)
                ++p;
            do {
                --p;
            } while((p != s) && isspace(*p));
            *(p + 1) = '\0';
        }
    }
    return(s);
}



回答7:


A refinement of another post above.

void  strstrip( char *s )
{
  char *start;
  char *end;

  // Exit if param is NULL pointer
  if (s == NULL)
    return;

  // Skip over leading whitespace
  start = s;
  while ((*start) && isspace(*start))
    start++;      

  // Is string just whitespace?
  if (!(*start)) 
  {         
    *s = 0x00; // Truncate entire string
    return;     
  }     

  // Find end of string
  end = start;
  while (*end)         
    end++;     

  // Step back from NUL
  end--;      

  // Step backward until first non-whitespace
  while ((end != start) && isspace(*end))         
    end--;     

  // Chop off trailing whitespace
  *(end + 1) = 0x00;

  // If had leading whitespace, then move entire string back to beginning
  if (s != start)
    memmove(s, start, end-start+1);      

  return; 
} 



回答8:


For those who would like to see a recursive solution, I offer this:

static char* trim_left_ptr(char* str)
{
    if (*str == 0)
    {
        // no more in string. It is an empty string
        return str;
    }

    if (*str == ' ' || *str == '\t')
    {
        // it is space or tab. Try next.
        return trim_left_ptr(str + 1);
    }


    // found left side of string
    return str;
}


static char* trim_right_ptr(char* str)
{
    if (*str == 0)
    {
        // found end of string
        return str;
    }

    // try next (recursion)
    char* ptr = trim_right_ptr( str + 1 );

    // on the return from recursion.
    // ptr == str until a nonspace/nontab is found.
    if (*(ptr - 1) == ' ' || *(ptr - 1) == '\t')
    {
        // is space or tab. Have not yet found right side
        return ptr - 1;
    }

    // found right side of string
    return ptr;
}



char* trim(char* str)
{
    char* L_Ptr = trim_left_ptr(str);
    char* R_Ptr = trim_right_ptr(str);

    // calculate characters to store in new buffer
    _int32 sz = R_Ptr - L_Ptr;

    // allocate buffer
    char* newstr = (char*) malloc(sz + 1);

    // copy trimmed string
    memcpy(newstr, L_Ptr, sz);

    // terminate string
    *(newstr + sz) = 0;

    return newstr;
}

Of course, it is not the only possible recursive solution.




回答9:


Adding yet another answer to an already crowded field, but … I believe with good reason. Specifically, the (currently accepted) answer by AlfaZulu neither strips trailing blanks nor correctly respects the bounds of the arrays. Valgrind reports out-of-bounds reads and writes when the source string is an empty string.

Here's sample code with the stripLeadingAndTrailingSpaces() function from AlfaZulu's answer verbatim (including the trailing blanks) — with just the static added before it to conform to my prejudices. (I use compiler options that prevent the code compiling unless there's either a prototype for the function or the function is static). There's also a function str_strip() which is a fixed version of the function by AlfaZulu. The test harness puts the two functions to work. The code assumes a sufficiently POSIX-like environment so that strdup() is available to allocate a duplicate copy of a string.

Note that the name str_strip() avoids conflict with the reserved names for the standard C library:

7.31.13 String handling <string.h>

1 Function names that begin with str, mem, or wcs and a lowercase letter may be added to the declarations in the <string.h> header.

#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>

/* Code verbatim from answer by AlfaZulu (except for static) — which has problems */
static
void stripLeadingAndTrailingSpaces(char* string){

     assert(string);

     /* First remove leading spaces */

     const char* firstNonSpace = string;

     while(*firstNonSpace != '\0' && isspace(*firstNonSpace))
     {
          ++firstNonSpace;
     }

     size_t len = strlen(firstNonSpace)+1;

     memmove(string, firstNonSpace, len);

     /* Now remove trailing spaces */

     char* endOfString = string + len;

     while(string < endOfString  && isspace(*endOfString))
     {
          --endOfString ;
     }

     *endOfString = '\0';

}

static void str_strip(char *string)
{
    assert(string);
    //printf("-->> %s(): [%s]\n", __func__, string);

    /* First remove leading spaces */
    const char *firstNonSpace = string;

    while (isspace((unsigned char)*firstNonSpace))
        ++firstNonSpace;
    //printf("---- %s(): [%s]\n", __func__, firstNonSpace);

    size_t len = strlen(firstNonSpace) + 1;
    memmove(string, firstNonSpace, len);
    //printf("---- %s(): [%s]\n", __func__, string);

    /* Now remove trailing spaces */
    char *endOfString = string + len - 1;
    //printf("---- %s(): EOS [%s]\n", __func__, endOfString);

    while (string < endOfString && isspace((unsigned char)endOfString[-1]))
        --endOfString;
    *endOfString = '\0';
    //printf("<<-- %s(): [%s]\n", __func__, string);
}

static void chk_stripper(const char *str)
{
    char *copy1 = strdup(str);
    printf("V1 Before: [%s]\n", copy1);
    stripLeadingAndTrailingSpaces(copy1);
    printf("V1 After:  [%s]\n", copy1);
    free(copy1);
    fflush(stdout);

    char *copy2 = strdup(str);
    printf("V2 Before: [%s]\n", copy2);
    str_strip(copy2);
    printf("V2 After:  [%s]\n", copy2);
    free(copy2);
    fflush(stdout);
}

int main(void)
{
    char *str[] =
    {
        "    \t    ABC   DEF    \t  ",
        "    \t                 \t  ",
        " ",
        "",
    };
    enum { NUM_STR = sizeof(str) / sizeof(str[0]) };
    for (int i = 0; i < NUM_STR; i++)
        chk_stripper(str[i]);
    return 0;
}

When run under Valgrind, I get the output:

$ valgrind --suppressions=etc/suppressions-macos-10.12.5 -- ./slts59
==26999== Memcheck, a memory error detector
==26999== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==26999== Using Valgrind-3.13.0.SVN and LibVEX; rerun with -h for copyright info
==26999== Command: ./slts59
==26999== 
V1 Before: [            ABC   DEF         ]
V1 After:  [ABC   DEF         ]
V2 Before: [            ABC   DEF         ]
V2 After:  [ABC   DEF]
V1 Before: [                              ]
V1 After:  []
V2 Before: [                              ]
V2 After:  []
V1 Before: [ ]
V1 After:  []
V2 Before: [ ]
V2 After:  []
==26999== Invalid read of size 1
==26999==    at 0x100000B81: stripLeadingAndTrailingSpaces (slts59.c:28)
==26999==    by 0x100000CB0: chk_stripper (slts59.c:67)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999==  Address 0x100b7df01 is 0 bytes after a block of size 1 alloc'd
==26999==    at 0x100096861: malloc (vg_replace_malloc.c:302)
==26999==    by 0x1002DC938: strdup (in /usr/lib/system/libsystem_c.dylib)
==26999==    by 0x100000C88: chk_stripper (slts59.c:65)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999== 
==26999== Invalid write of size 1
==26999==    at 0x100000B96: stripLeadingAndTrailingSpaces (slts59.c:33)
==26999==    by 0x100000CB0: chk_stripper (slts59.c:67)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999==  Address 0x100b7df01 is 0 bytes after a block of size 1 alloc'd
==26999==    at 0x100096861: malloc (vg_replace_malloc.c:302)
==26999==    by 0x1002DC938: strdup (in /usr/lib/system/libsystem_c.dylib)
==26999==    by 0x100000C88: chk_stripper (slts59.c:65)
==26999==    by 0x100000DA2: main (slts59.c:91)
==26999== 
V1 Before: []
V1 After:  []
V2 Before: []
V2 After:  []
==26999== 
==26999== HEAP SUMMARY:
==26999==     in use at exit: 34,572 bytes in 162 blocks
==26999==   total heap usage: 186 allocs, 24 frees, 40,826 bytes allocated
==26999== 
==26999== LEAK SUMMARY:
==26999==    definitely lost: 0 bytes in 0 blocks
==26999==    indirectly lost: 0 bytes in 0 blocks
==26999==      possibly lost: 0 bytes in 0 blocks
==26999==    still reachable: 0 bytes in 0 blocks
==26999==         suppressed: 34,572 bytes in 162 blocks
==26999== 
==26999== For counts of detected and suppressed errors, rerun with: -v
==26999== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 2 from 2)
$

The str_strip() function shown is working correctly. It features a more or less minimal set of changes necessary to get stripLeadingAndTrailingSpaces() to work cleanly (plus commented out debug code used during the checking process — that could go now).

I observe that although it has slightly different semantics, the Linux-based strstrip() function from Tuminoid's answer is also clean — no memory access errors, and it removes trailing blanks (without moving the portion of the string between the first and last non-blank characters).




回答10:


int i = strlen(s) - 1;
while (isspace(s[i]))
    s[i--] = '\0';
while (isspace(*s))
    s++;

That should take care of the problem as long as you don't care about mangling up the string like crazy and if you don't care about memory leaks!




回答11:


You should be able to do it in-place; stripping whitespace can never cause the string to grow. You might be able to do it without first checking the length of the string, but doing so might be needlessly "clever". You should look into the memmove() function, in addition to the ones @Norman Ramsey mentioned.




回答12:


If you're on Linux/Windows and have the library glib linked into your program, you can use the the routine g_strstrip().




回答13:


char *strip(char *string)
{
    char *start = string;
    while(isblank(*start)) start++;
    int end = strlen(start);
    if(start != string) {
        memmove(string, start, end);
        string[end] = '\0';
    }
    while(isblank(*(string+end-1))) end--;
    string[end] = '\0';
    return string;
}



回答14:


Correted algorithm from fpsgamer (also ISO C90 valid):

void trimWhitespace(char *string) {
    const char* firstNonSpace = string;
    char* endOfString;
    size_t len;

    if (string[0] == '\0') {
        return;
    }

    /* First remove leading spaces */
    while(*firstNonSpace != '\0' && isspace(*firstNonSpace)) {
        ++firstNonSpace;
    }
    len = strlen(firstNonSpace) + 1;
    memmove(string, firstNonSpace, len);

    /* Now remove trailing spaces */
    endOfString = string + len;

    while(string < endOfString && (isspace(*endOfString) || *endOfString == '\0')) {
        --endOfString ;
    }

    *(endOfString + 1) = '\0';
}



回答15:


for trailing white spaces use strtok. set the delimiter = " " and when it runs it discards the delimiter byte and returns char * to the token

char *x;
char *y = "somestring "; 

x = strtok(y," ");

result x = pointer to "somestring" not "somestring "




回答16:


Edit : Updated the code based on the latest version of zString library.

This code relies on no libraries, only pointer arithmetic and integers. There are three functions : trim, lef-trim and right-trim. (I should add all these functions in to zString library :) )

  • char *zstring_trim(char *s) removes leading and trailing white-spaces

  • char *zstring_ltrim(char *s) removes leading whites-spaces

  • char *zstring_ltrim(char *s) removes trailing whites-spaces

All these functions modify the original character string

/* trim */
char *zstring_trim(char *str){
    char *src=str;  /* save the original pointer */
    char *dst=str;  /* result */
    char c;
    int is_space=0;
    int in_word=0;  /* word boundary logical check */
    int index=0;    /* index of the last non-space char*/

    /* validate input */
    if (!str)
        return str;

    while ((c=*src)){
        is_space=0;

        if (c=='\t' || c=='\v' || c=='\f' || c=='\n' || c=='\r' || c==' ')
            is_space=1;

        if(is_space == 0){
         /* Found a word */
            in_word = 1;
            *dst++ = *src++;  /* make the assignment first
                               * then increment
                               */
        } else if (is_space==1 && in_word==0) {
         /* Already going through a series of white-spaces */
            in_word=0;
            ++src;
        } else if (is_space==1 && in_word==1) {
         /* End of a word, dont mind copy white spaces here */
            in_word=0;
            *dst++ = *src++;
            index = (dst-str)-1; /* location of the last char */
        }
    }

    /* terminate the string */
    *(str+index)='\0';

    return str;
}

/* right trim */
char *zstring_rtrim(char *str){
    char *src=str;  /* save the original pointer */
    char *dst=str;  /* result */
    char c;
    int is_space=0;
    int index=0;    /* index of the last non-space char */

    /* validate input */
    if (!str)
        return str;

    /* copy the string */
    while(*src){
        *dst++ = *src++;
        c = *src;

        if (c=='\t' || c=='\v' || c=='\f' || c=='\n' || c=='\r' || c==' ')
            is_space=1;
        else
            is_space=0;

        if (is_space==0 && *src)
            index = (src-str)+1;
    }

    /* terminate the string */
    *(str+index)='\0';

    return str;
}

/* left trim */
char *zstring_ltrim(char *str){
    char *src=str;  /* save the original pointer */
    char *dst=str;  /* result */
    char c;
    int index=0;    /* index of the first non-space char */

    /* validate input */
    if (!str)
        return str;

    /* skip leading white-spaces */
    while((c=*src)){ 
        if (c=='\t' || c=='\v' || c=='\f' || c=='\n' || c=='\r' || c==' '){
            ++src;
            ++index;
        } else
            break;
    }

    /* copy rest of the string */
    while(*src)
        *dst++ = *src++;

    /* terminate the string */
    *(src-index)='\0';

    return str;
}


来源:https://stackoverflow.com/questions/352055/best-algorithm-to-strip-leading-and-trailing-spaces-in-c

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!