Read .CSV file in C

后端 未结 5 1038
温柔的废话
温柔的废话 2020-11-22 10:15

I Have a .csv file :

lp;imie;nazwisko;ulica;numer;kod;miejscowosc;telefon;email;data_ur
1;Jan;Kowalski;ul. Nowa;1a;11-234;Budry;123-123-456;jan@go.xxx;1980.         


        
5条回答
  •  轮回少年
    2020-11-22 10:40

    The following code is in plain c language and handles blank spaces. It only allocates memory once, so one free() is needed, for each processed line.

    http://ideone.com/mSCgPM

    /* Tiny CSV Reader */
    /* Copyright (C) 2015, Deligiannidis Konstantinos
    
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see .  */
    
    
    #include 
    #include 
    #include 
    
    
    /* For more that 100 columns or lines (when delimiter = \n), minor modifications are needed. */
    int getcols( const char * const line, const char * const delim, char ***out_storage )
    
    {
    const char *start_ptr, *end_ptr, *iter;
    char **out;
    int i;                                          //For "for" loops in the old c style.
    int tokens_found = 1, delim_size, line_size;    //Calculate "line_size" indirectly, without strlen() call.
    int start_idx[100], end_idx[100];   //Store the indexes of tokens. Example "Power;": loc('P')=1, loc(';')=6
    //Change 100 with MAX_TOKENS or use malloc() for more than 100 tokens. Example: "b1;b2;b3;...;b200"
    
    if ( *out_storage != NULL )                 return -4;  //This SHOULD be NULL: Not Already Allocated
    if ( !line || !delim )                      return -1;  //NULL pointers Rejected Here
    if ( (delim_size = strlen( delim )) == 0 )  return -2;  //Delimiter not provided
    
    start_ptr = line;   //Start visiting input. We will distinguish tokens in a single pass, for good performance.
                        //Then we are allocating one unified memory region & doing one memory copy.
    while ( ( end_ptr = strstr( start_ptr, delim ) ) ) {
    
        start_idx[ tokens_found -1 ] = start_ptr - line;    //Store the Index of current token
        end_idx[ tokens_found - 1 ] = end_ptr - line;       //Store Index of first character that will be replaced with
                                                            //'\0'. Example: "arg1||arg2||end" -> "arg1\0|arg2\0|end"
        tokens_found++;                                     //Accumulate the count of tokens.
        start_ptr = end_ptr + delim_size;                   //Set pointer to the next c-string within the line
    }
    
    for ( iter = start_ptr; (*iter!='\0') ; iter++ );
    
    start_idx[ tokens_found -1 ] = start_ptr - line;    //Store the Index of current token: of last token here.
    end_idx[ tokens_found -1 ] = iter - line;           //and the last element that will be replaced with \0
    
    line_size = iter - line;    //Saving CPU cycles: Indirectly Count the size of *line without using strlen();
    
    int size_ptr_region = (1 + tokens_found)*sizeof( char* );   //The size to store pointers to c-strings + 1 (*NULL).
    out = (char**) malloc( size_ptr_region + ( line_size + 1 ) + 5 );   //Fit everything there...it is all memory.
    //It reserves a contiguous space for both (char**) pointers AND string region. 5 Bytes for "Out of Range" tests.
    *out_storage = out;     //Update the char** pointer of the caller function.
    
    //"Out of Range" TEST. Verify that the extra reserved characters will not be changed. Assign Some Values.
    //char *extra_chars = (char*) out + size_ptr_region + ( line_size + 1 );
    //extra_chars[0] = 1; extra_chars[1] = 2; extra_chars[2] = 3; extra_chars[3] = 4; extra_chars[4] = 5;
    
    for ( i = 0; i < tokens_found; i++ )    //Assign adresses first part of the allocated memory pointers that point to
        out[ i ] = (char*) out + size_ptr_region + start_idx[ i ];  //the second part of the memory, reserved for Data.
    out[ tokens_found ] = (char*) NULL; //[ ptr1, ptr2, ... , ptrN, (char*) NULL, ... ]: We just added the (char*) NULL.
                                                        //Now assign the Data: c-strings. (\0 terminated strings):
    char *str_region = (char*) out + size_ptr_region;   //Region inside allocated memory which contains the String Data.
    memcpy( str_region, line, line_size );   //Copy input with delimiter characters: They will be replaced with \0.
    
    //Now we should replace: "arg1||arg2||arg3" with "arg1\0|arg2\0|arg3". Don't worry for characters after '\0'
    //They are not used in standard c lbraries.
    for( i = 0; i < tokens_found; i++) str_region[ end_idx[ i ] ] = '\0';
    
    //"Out of Range" TEST. Wait until Assigned Values are Printed back.
    //for ( int i=0; i < 5; i++ ) printf("c=%x ", extra_chars[i] ); printf("\n");
    
    // *out memory should now contain (example data):
    //[ ptr1, ptr2,...,ptrN, (char*) NULL, "token1\0", "token2\0",...,"tokenN\0", 5 bytes for tests ]
    //   |__________________________________^           ^              ^             ^
    //          |_______________________________________|              |             |
    //                   |_____________________________________________|      These 5 Bytes should be intact.
    
    return tokens_found;
    }
    
    
    int main()
    
    {
    
    char in_line[] = "Arg1;;Th;s is not Del;m;ter;;Arg3;;;;Final";
    char delim[] = ";;";
    char **columns;
    int i;
    
    printf("Example1:\n");
    columns = NULL; //Should be NULL to indicate that it is not assigned to allocated memory. Otherwise return -4;
    
    int cols_found = getcols( in_line, delim, &columns);
    for ( i = 0; i < cols_found; i++ ) printf("Column[ %d ] = %s\n", i, columns[ i ] );  //<- (1st way).
    // (2nd way) // for ( i = 0; columns[ i ]; i++) printf("start_idx[ %d ] = %s\n", i, columns[ i ] );
    
    free( columns );    //Release the Single Contiguous Memory Space.
    columns = NULL;     //Pointer = NULL to indicate it does not reserve space and that is ready for the next malloc().
    
    printf("\n\nExample2, Nested:\n\n");
    
    char example_file[] = "ID;Day;Month;Year;Telephone;email;Date of registration\n"
            "1;Sunday;january;2009;123-124-456;jitter@go.xyz;2015-05-13\n"
            "2;Monday;March;2011;(+30)333-22-55;buffer@wl.it;2009-05-23";
    
    char **rows;
    int j;
    
    rows = NULL; //getcols() requires it to be NULL. (Avoid dangling pointers, leaks e.t.c).
    
    getcols( example_file, "\n", &rows);
    for ( i = 0; rows[ i ]; i++) {
        {
            printf("Line[ %d ] = %s\n", i, rows[ i ] );
            char **columnX = NULL;
            getcols( rows[ i ], ";", &columnX);
            for ( j = 0; columnX[ j ]; j++) printf("  Col[ %d ] = %s\n", j, columnX[ j ] );
            free( columnX );
        }
    }
    
    free( rows );
    rows = NULL;
    
    return 0;
    }
    

提交回复
热议问题