Implementation of strtok() function

懵懂的女人 提交于 2020-01-24 00:40:13

问题


I need to write my function strtok. Below is my code. The problem is - I can't display result string. In code I use strcpy() and then display new array. Is it possible to display string using just pointer *str?

#include <stdio.h>
#include <string.h>   

char* my_strtok(char* s, char* delm){
    char W[100];
    int i = 0, k = 0, j = 0;
    char *ptr;
    static char *Iterator;
    ptr = s;

    if (s == NULL){
        s = Iterator;
    }
    while (s[i] != '\0'){
        j = 0;
        while (delm[j] != '\0'){
            if (s[i] != delm[j])
                W[k] = s[i];
            else goto It;
            j++;
        }
        ptr++;
        i++;
        k++;
    }
It:
    W[i] = 0;
    Iterator = ++ptr;
    return W;
}

int main(void){
    char s[100];
    char delm[] = " ";
    gets(s);
    char newstr[100];
    char *str = my_strtok(s, delm);
    strcpy(newstr, str);
    printf("%s", newstr);
    return 0;
}

回答1:


Internal implementation of strtok has already been discussed here:

How does strtok() split the string into tokens in C?

In your type of implementation ( calling it your type because it is quite different from the actual one), you have not allocated any memory dynamically to the local variable 'W'. So when you return it, it is not guaranteed that you will receive the string correctly because that memory which was locally allocated to 'W' is not reserved for it anymore in the calling function. Apart from this, there are many unnecessary variables used in the code. And also, it needs proper re-structuring. For your better understanding I have modified your code(keeping it in your style as far as possible) to do the required job:

#include <stdio.h>
#include <string.h>  
#include <malloc.h>

char* my_strtok(char* s, char* delm)
{
    static int currIndex = 0;
    if(!s || !delm || s[currIndex] == '\0')
    return NULL;
    char *W = (char *)malloc(sizeof(char)*100);
    int i = currIndex, k = 0, j = 0;

    while (s[i] != '\0'){
        j = 0;
        while (delm[j] != '\0'){
            if (s[i] != delm[j])
                W[k] = s[i];
            else goto It;
            j++;
        }

        i++;
        k++;
    }
It:
    W[i] = 0;
    currIndex = i+1;
    //Iterator = ++ptr;
    return W;
}

int main(void)
{
    char s[100] = "my name is khan";
    char delm[] = " ";
    //char newstr[100];
    char *str = my_strtok(s, delm);
    while(str){
        printf("%s", str);
        free(str);
        str = my_strtok(s, delm);
    }

    return 0;
}



回答2:


Bounty Disclaimer

Bounty: "Looking for an answer drawing from credible and/or official sources."

I think GNU GLibC constitutes a credible and official source, right? You can download GLibC 2.22 here as .tar.gz (25mb).


After you extract the sources:

string/strtok.c

#include <string.h>


static char *olds;

#undef strtok

#ifndef STRTOK
# define STRTOK strtok
#endif

/* Parse S into tokens separated by characters in DELIM.
   If S is NULL, the last string strtok() was called with is
   used.  For example:
    char s[] = "-abc-=-def";
    x = strtok(s, "-");     // x = "abc"
    x = strtok(NULL, "-=");     // x = "def"
    x = strtok(NULL, "=");      // x = NULL
        // s = "abc\0=-def\0"
*/
char *
STRTOK (char *s, const char *delim)
{
  char *token;

  if (s == NULL)
    s = olds;

  /* Scan leading delimiters.  */
  s += strspn (s, delim);
  if (*s == '\0')
    {
      olds = s;
      return NULL;
    }

  /* Find the end of the token.  */
  token = s;
  s = strpbrk (token, delim);
  if (s == NULL)
    /* This token finishes the string.  */
    olds = __rawmemchr (token, '\0');
  else
    {
      /* Terminate the token and make OLDS point past it.  */
      *s = '\0';
      olds = s + 1;
    }
  return token;
}


string/strspn.c
#include <string.h>

#undef strspn
#ifndef STRSPN
#define STRSPN strspn
#endif

/* Return the length of the maximum initial segment
   of S which contains only characters in ACCEPT.  */
size_t
STRSPN (const char *s, const char *accept)
{
  const char *p;
  const char *a;
  size_t count = 0;

  for (p = s; *p != '\0'; ++p)
    {
      for (a = accept; *a != '\0'; ++a)
    if (*p == *a)
      break;
      if (*a == '\0')
    return count;
      else
    ++count;
    }

  return count;
}
libc_hidden_builtin_def (strspn)


string/strpbrk.c
#include <string.h>

#undef strpbrk

#ifndef STRPBRK
#define STRPBRK strpbrk
#endif

/* Find the first occurrence in S of any character in ACCEPT.  */
char *
STRPBRK (const char *s, const char *accept)
{
  while (*s != '\0')
    {
      const char *a = accept;
      while (*a != '\0')
    if (*a++ == *s)
      return (char *) s;
      ++s;
    }

  return NULL;
}
libc_hidden_builtin_def (strpbrk)


sysdeps/x86_64/rawmemchr.S
#include <sysdep.h>

    .text
ENTRY (__rawmemchr)
    movd    %rsi, %xmm1
    mov %rdi, %rcx

    punpcklbw %xmm1, %xmm1
    punpcklbw %xmm1, %xmm1

    and $63, %rcx
    pshufd  $0, %xmm1, %xmm1

    cmp $48, %rcx
    ja  L(crosscache)

    movdqu  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
/* Check if there is a match.  */
    pmovmskb %xmm0, %eax
    test    %eax, %eax

    jnz L(matches)
    add $16, %rdi
    and $-16, %rdi
    jmp L(loop_prolog)

    .p2align 4
L(crosscache):
    and $15, %rcx
    and $-16, %rdi
    movdqa  (%rdi), %xmm0

    pcmpeqb %xmm1, %xmm0
/* Check if there is a match.  */
    pmovmskb %xmm0, %eax
/* Remove the leading bytes.  */
    sar %cl, %eax
    test    %eax, %eax
    je  L(unaligned_no_match)
/* Check which byte is a match.  */
    bsf %eax, %eax

    add %rdi, %rax
    add %rcx, %rax
    ret

    .p2align 4
L(unaligned_no_match):
    add $16, %rdi

    .p2align 4
L(loop_prolog):
    movdqa  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    movdqa  16(%rdi), %xmm2
    pcmpeqb %xmm1, %xmm2
    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    movdqa  48(%rdi), %xmm4
    pcmpeqb %xmm1, %xmm4
    add $64, %rdi
    pmovmskb %xmm4, %eax
    test    %eax, %eax
    jnz L(matches0)

    test    $0x3f, %rdi
    jz  L(align64_loop)

    movdqa  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    movdqa  16(%rdi), %xmm2
    pcmpeqb %xmm1, %xmm2
    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    movdqa  48(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax

    add $64, %rdi
    test    %eax, %eax
    jnz L(matches0)

    and $-64, %rdi

    .p2align 4
L(align64_loop):
    movdqa  (%rdi), %xmm0
    movdqa  16(%rdi), %xmm2
    movdqa  32(%rdi), %xmm3
    movdqa  48(%rdi), %xmm4

    pcmpeqb %xmm1, %xmm0
    pcmpeqb %xmm1, %xmm2
    pcmpeqb %xmm1, %xmm3
    pcmpeqb %xmm1, %xmm4

    pmaxub  %xmm0, %xmm3
    pmaxub  %xmm2, %xmm4
    pmaxub  %xmm3, %xmm4
    pmovmskb %xmm4, %eax

    add $64, %rdi

    test    %eax, %eax
    jz  L(align64_loop)

    sub $64, %rdi

    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3

    pcmpeqb 48(%rdi), %xmm1
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    pmovmskb %xmm1, %eax
    bsf %eax, %eax
    lea 48(%rdi, %rax), %rax
    ret

    .p2align 4
L(matches0):
    bsf %eax, %eax
    lea -16(%rax, %rdi), %rax
    ret

    .p2align 4
L(matches):
    bsf %eax, %eax
    add %rdi, %rax
    ret

    .p2align 4
L(matches16):
    bsf %eax, %eax
    lea 16(%rax, %rdi), %rax
    ret

    .p2align 4
L(matches32):
    bsf %eax, %eax
    lea 32(%rax, %rdi), %rax
    ret

    .p2align 4
L(return_null):
    xor %rax, %rax
    ret

END (__rawmemchr)

weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)



回答3:


Actually this one is my solution to the general case... when str is char* you can't write to it.

Here it is:

And here is the link to github resource:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#define DICT_LEN 256

int *create_delim_dict(char *delim)
{
    int *d = (int*)malloc(sizeof(int)*DICT_LEN);
    memset((void*)d, 0, sizeof(int)*DICT_LEN);

    int i;
    for(i=0; i< strlen(delim); i++) {
        d[delim[i]] = 1;
    }
    return d;
}



char *my_strtok(char *str, char *delim)
{

    static char *last, *to_free;
    int *deli_dict = create_delim_dict(delim);

    if(!deli_dict) {
        return NULL;
    }

    if(str) {
        last = (char*)malloc(strlen(str)+1);
        if(!last) {
            free(deli_dict);
        }
        to_free = last;
        strcpy(last, str);
    }

    while(deli_dict[*last] && *last != '\0') {
        last++;
    }
    str = last;
    if(*last == '\0') {
        free(deli_dict);
        free(to_free);
        return NULL;
    }
    while (*last != '\0' && !deli_dict[*last]) {
        last++;
    }

    *last = '\0';
    last++;

    free(deli_dict);
    return str;
}

int main()
{
    char * str = "- This, a sample string.";
    char *del = " ,.-";
    char *s = my_strtok(str, del);
    while(s) {
        printf("%s\n", s);
        s = my_strtok(NULL, del);
    }
    return 0;
}



回答4:


char * strtok2(char *str, const char *delim)
{
static char *nxt; /* static variable used to advance the string to replace delimiters */
static int size;  /* static variable used to count until the end of the string        */

/* IMPORTANT: any advance to 'nxt' must be followed by a diminution of 'size', and vice verce */

int i; /* counter of delimiter(s) in string */

/* initialize the string when strtok2 is first calles and supplied with a valid string */
if(str != NULL)
{
    nxt = str;
    size = strlen(str);
}

/* if we havn't reached the end of the string, any other call to strtok2 with NULL will come here */
else if(size > 0)
{
    nxt++;      /* last run left nxt on a null terminator, so we have to advance it */
    size--;     /* any advancement must follow diminution of size                   */
    str = nxt;  /* string now points to the first char after the last delimiter     */ 
}

/* if we reached the end of string, return NULL pointer */
else
{
    str = NULL;
}

/* nxt is used to advance the string until a delimiter or a series of delimiters are encountered; 
 * it then stops on the last delimiter which has turned to NULL terminator
 */
while(*nxt)
{
    i = strspn(nxt, delim);
    while(i > 1)        /* turns delimiters to NULL terminator (except the last one) */
    {
        *nxt = '\0';
        nxt++;
        size--;
        i--;
    }                   /* in the last delimiter we have to do something a      */
    if(1 == i)          /* bit different we have to actually take nxt backwards */
    {                   /* one step, to break the while(*nxt) loop              */
        *nxt = '\0';
        if(size > 1)    /* if the delimiter is last char, don't do next lines   */
        {
            nxt--;
            size++;     /* nxt is diminished so size increases                    */
        }
    }
    nxt++;          /* if no delimiter is found, advance nxt                  */
    size--;         /* in case of the last delimiter in a series, we took nxt */
}                   /* a step back, so now moving it a step forward means     */
                    /* it rests on a NULL terminator                          */
return str;
}



回答5:


Here is an implementation increase the input buffers as per your needs and increase the no of calls to strtok with str1 as NULL to get back more tokens

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char *ret_nondelim(char *str1,char *str2);
char *my_strtok(char *str1,char *str2);
char *ret_noofbytes(char *str1,char *str2);
int main()
{
    char str1[20];
    char str2[20];
    char *res;
    printf("enter string one\n");
    scanf("%s",str1);
    printf("enter string two\n");
    scanf("%s",str2);
    res=my_strtok(str1,str2);
    if(res)
        printf("%s\n",res);
    else
        printf("returned NULL\n");
    free(res);
    res=my_strtok(NULL,str2);
    if(res)
        printf("%s\n",res);
    else
        printf("returned NULL\n");
    free(res);
    res=my_strtok(NULL,str2);
    if(res)
        printf("%s\n",res);
    else
        printf("returned NULL\n");
    free(res);
}
char *my_strtok(char *str1,char *str2)
{
    static char *str1_cpy;
    static int i;
    char *ptr;
    int flag=0;
    if(str1!=NULL)
    {
        str1_cpy=ret_nondelim(str1,str2);      //get location of non delimiting byte
        str1=ret_noofbytes(str1_cpy,str2);  //scan and get location of delimitng byte
        if((str1-str1_cpy))         //no of bytes = non delimiting location - delimting location
        {
            ptr=malloc(str1-str1_cpy+1);        //malloc location and and store the string`enter code here`
            int k;
            for(k=0;k<(str1-str1_cpy);k++)
                ptr[k]=str1_cpy[k];
            ptr[k]='\0';
            str1_cpy=str1;              //save pointer for next iteration
            return ptr;             //return pointer
        }
        else
            return NULL;

    }
    else
    {
        str1_cpy=ret_nondelim(str1_cpy,str2);   //same as above but pass saved pointer 
        str1=ret_noofbytes(str1_cpy,str2);
        if((str1-str1_cpy))
        {
            ptr=malloc(str1-str1_cpy+1);
            int k;
            for(k=0;k<(str1-str1_cpy);k++)
                ptr[k]=str1_cpy[k];
            ptr[k]='\0';
            str1_cpy=str1;          //save pointer for next iteration
            return ptr;
        }
        else
            return NULL;
    }
}
char *ret_nondelim(char *str1,char *str2)
{
    int flag=0;
    for(;*(str1);)
    {
        for(int j=0;j<strlen(str2);j++)
        {
            if(*(str1)==*(str2+j))          //check if slected byte is non delimiting byte
            {
                str1++;             //break if true go check next byte
                break;
            }
            else
                flag++;             //shows that selected byte is none of the delimitng byte
        }
        if(flag==strlen(str2))              //(above comment)-so non delimiting byte found return that pointer to caller
            break;
        else
            flag=0;
    }
    return str1;
}

char *ret_noofbytes(char *str1,char *str2)
{
    int flag=0;
    for(;*(str1);)
    {
        for(int k=0;k<strlen(str2);k++)
        {
            if(*(str2+k)==*(str1))          //check if selected bytes is delimiting byte
            {
                flag=1;
                break;              //break twice if true ie .flag==1
            }
        }
        if(flag==1)
            break;
        else
            str1++;                 //if not found go check next byte
    }
    return str1;                        //return the point where delimiting byte was found
}



回答6:


Here is the code which explains how strtok works in C.

#include <stdio.h>
#include <string.h>

#define SIZE_OF_STRING 50 
#define SIZE_OF_DELIMITER 5

/* Make the temp_ptr as static, so it will hold the previous pointing address */
static char *temp_ptr = NULL;

char *string_token(char *str, char *delimiter)
{
    char *final_ptr = NULL;  `enter code here`
    /* Flag has been defined as static to avoid the parent function loop
     * runs infinitely after reaching end of string.
     */ 
    static int flag = 0;
    int i, j;

    if (delimiter == NULL) {
        return NULL;
    }

    /* If flag is 1, it will denote the end of the string */
    if (flag == 1) {
        return NULL;
    }

    /* The below condition is to avoid temp_ptr is getting assigned 
     * with NULL string from the parent function main. Without
     * the below condition, NULL will be assigned to temp_ptr 
     * and final_ptr, so accessing these pointers will lead to
     * segmentation fault.
     */
    if (str != NULL) { 
        temp_ptr = str; 
    }

    /* Before function call ends, temp_ptr should move to the next char,
     * so we can't return the temp_ptr. Hence, we introduced a new pointer
     * final_ptr which points to temp_ptr.
     */
    final_ptr = temp_ptr;

    printf("%s %d str: %s delimiter: %s length: %ld temp_ptr: %s strlen: %ld"
           " final_ptr: %s \n",__func__, __LINE__, str, delimiter, 
           strlen(delimiter), temp_ptr, strlen(temp_ptr), final_ptr);

    for (i = 0; i <= strlen(temp_ptr); i++)
    {
        for (j = 0; j < strlen(delimiter); j++) {

            if (temp_ptr[i] == '\0') {
                /* If the flag is not set, both the temp_ptr and flag_ptr 
                 * will be holding string "Jasmine" which will make parent 
                 * to call this function string_token infinitely. 
                 */
                flag = 1;
                return final_ptr;
            }

            if ((temp_ptr[i] == delimiter[j])) {
                /* NULL character is assigned, so that final_ptr will return 
                 * til NULL character. Here, final_ptr points to temp_ptr.
                 */
                temp_ptr[i] = '\0';
                temp_ptr += i+1;
                return final_ptr;
            }
        }
    }
    /* You will somehow end up here if for loop condition fails.
     * If this function doesn't return any char pointer, it will 
     * lead to segmentation fault in the parent function.
     */
    return NULL;
}


int main()
{
    char str[SIZE_OF_STRING] = "shirley|Rose|Jasmine";
    char *token = NULL;
    char del[SIZE_OF_DELIMITER] = "|";

    token = string_token(str, del);
    while (token != NULL) {       
        printf("token %s\n", token);
        token = string_token(NULL, del);
    }
    return 0;
}

https://shirleyanengineer.blogspot.com/2019/11/write-your-own-strtok-in-c.html


来源:https://stackoverflow.com/questions/28931379/implementation-of-strtok-function

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!