How do I read an arbitrarily long line in C?

后端 未结 2 586
盖世英雄少女心
盖世英雄少女心 2020-12-17 00:03

I would like to have the equivalent (in C) of getline in C++:

std::string s;
getline(std::cin,s); // reads an arbitrarily long line and
                 


        
相关标签:
2条回答
  • 2020-12-17 00:33

    In case you don't have access to a POSIX getline() implementation, here's a public domain implementation that I have lying around.

    I added a little getline_simple() function that just returns the next line in a dynamically allocated buffer. If you're not interested in detailed error handling, you can use that function to read a file line-by-line:

    #define _CRT_SECURE_NO_WARNINGS 1
    
    #include <assert.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <stdio.h>
    #include <errno.h>
    #include <limits.h>
    #include <sys/types.h>
    
    
    #if !__GNUC__
    #if _WIN64
    typedef long long ssize_t;
    #else
    typedef long ssize_t;
    #endif
    #endif
    
    
    #if !defined(SSIZE_MAX)
    #define SSIZE_MAX ((ssize_t)(SIZE_MAX/2))
    #endif
    
    #if !defined(EOVERFLOW)
    #define EOVERFLOW (ERANGE)      /* is there something better to use? */
    #endif
    
    
    /*
        nx_getdelim()
    
        a version of the POSIX getdelim() function that return error codes directly
        instead of messing with the global `errno` value.
    */
    ssize_t nx_getdelim(char **lineptr, size_t *n, int delim, FILE *stream);
    
    
    
    /*
        getdelim_calc_new_alloc()
    
        Helper function for getdelim() to figure out an appropriate new
        allocation size that's not too small or too big.
    
        These numbers seem to work pretty well for most text files.
    
        returns the input value if it decides that new allocation block
        would be just too big (the caller should handle this as 
        an error).
    */
    static
    size_t nx_getdelim_get_realloc_size( size_t current_size)
    {
        enum {
            k_min_realloc_inc = 32,
            k_max_realloc_inc = 1024,
        };
    
        if (SSIZE_MAX < current_size) return current_size;
    
        if (current_size <= k_min_realloc_inc) return current_size + k_min_realloc_inc;
    
        if (current_size >= k_max_realloc_inc) return current_size + k_max_realloc_inc;
    
        return current_size * 2;
    }
    
    
    
    /*
        getdelim_append() 
    
        a helper function for getdelim() that adds a new character to 
        the outbuffer, reallocating as necessary to ensure the character
        and a following null terminator can fit
    
    */
    static
    int nx_getdelim_append( char** lineptr, size_t* bufsize, size_t count, char ch)
    {
        char* tmp = NULL;
        size_t tmp_size = 0;
    
        // assert the contracts for this functions inputs
        assert( lineptr != NULL);
        assert( bufsize != NULL);
    
        if (count >= (((size_t) SSIZE_MAX) + 1)) {
            // writing more than SSIZE_MAX to the buffer isn't supported
            return -1;
        }
    
        tmp = *lineptr;
        tmp_size = tmp ? *bufsize : 0;
    
        // need room for the character plus the null terminator
        if ((count + 2) > tmp_size) {
            tmp_size = nx_getdelim_get_realloc_size( tmp_size);
    
            tmp = (char*) realloc( tmp, tmp_size);
    
            if (!tmp) {
                return -1;
            }
        }
    
        *lineptr = tmp;
        *bufsize = tmp_size;
    
        // remember, the reallocation size calculation might not have 
        // changed the block size, so we have to check again
        if (tmp && ((count+2) <= tmp_size)) {
            tmp[count++] = ch;
            tmp[count] = 0;
            return 1;
        }
    
        return -1;
    }
    
    
    /*
        nx_getdelim()
    
        A getdelim() function modeled on the Linux/POSIX/GNU 
        function of the same name.
    
        Read data into a dynamically resizable buffer until 
        EOF or until a delimiter character is found.  The returned
        data will be null terminated (unless there's an error allocating
        memory that prevents it).
    
    
    
        params:
    
            lineptr -   a pointer to a char* allocated by malloc() 
                        (actually any pointer that can legitimately be
                        passed to free()).  *lineptr will be updated 
                        by getdelim() if the memory block needs to be 
                        reallocated to accommodate the input data.
    
                        *lineptr can be NULL (though lineptr itself cannot),
                        in which case the function will allocate any necessary 
                        buffer.
    
            n -         a pointer to a size_t object that contains the size of 
                        the buffer pointed to by *lineptr (if non-NULL).
    
                        The size of whatever buff the resulting data is 
                        returned in will be passed back in *n
    
            delim -     the delimiter character.  The function will stop
                        reading one this character is read form the stream.
    
                        It will be included in the returned data, and a
                        null terminator character will follow it.
    
            stream -    A FILE* stream object to read data from.
    
        Returns:
    
            The number of characters placed in the returned buffer, including
            the delimiter character, but not including the terminating null.
    
            If no characters are read and EOF is set (or attempting to read 
            from the stream on the first attempt caused the eof indication 
            to be set), a null terminator will be written to the buffer and
            0 will be returned.
    
            If an error occurs while reading the stream, a 0 will be returned.
            A null terminator will not necessarily be at the end of the data 
            written.
    
            On the following error conditions, the negative value of the error 
            code will be returned:
    
                ENOMEM:     out of memory
                EOVERFLOW:  SSIZE_MAX character written to te buffer before 
                            reaching the delimiter
                            (on Windows, EOVERFLOW is mapped to ERANGE)
    
             The buffer will not necessarily be null terminated in these cases.
    
    
        Notes:
    
            The returned data might include embedded nulls (if they exist
            in the data stream) - in that case, the return value of the
            function is the only way to reliably determine how much data
            was placed in the buffer.
    
            If the function returns 0 use feof() and/or ferror() to determine
            which case caused the return.
    
            If EOF is returned after having written one or more characters
            to the buffer, a normal count will be returned (but there will 
            be no delimiter character in the buffer).  
    
            If 0 is returned and ferror() returns a non-zero value,
            the data buffer may not be null terminated.
    
            In other cases where a negative value is returned, the data
            buffer is not necessarily null terminated and there 
            is no reliable means to determining what data in the buffer is
            valid.
    
            The pointer returned in *lineptr and the buffer size
            returned in *n will be valid on error returns unless
            NULL pointers are passed in for one or more of these
            parameters (in which case the return value will be -EINVAL).
    
    */
    ssize_t nx_getdelim(char **lineptr, size_t *n, int delim, FILE *stream)
    {
        int retval = 0;
    
        ssize_t result = 0;    
        char* line = NULL;
        size_t size = 0;
        size_t count = 0;
        int err = 0;
        int ch = 0;
    
        if (!lineptr || !n) {
            return -EINVAL;
        }
    
        line = *lineptr;
        size = *n;
    
        for (;;) {
            ch = fgetc( stream);
    
            if (ch == EOF) {
                break;
            }
    
            result = nx_getdelim_append( &line, &size, count, ch);
    
            // check for error adding to the buffer (ie., out of memory)
            if (result < 0) {
                err = -ENOMEM;
                break;
            }
    
            ++count;
    
            // check if we're done because we've found the delimiter
            if ((unsigned char)ch == (unsigned char)delim) {
                break;
            }
    
            // check if we're passing the maximum supported buffer size
            if (count > SSIZE_MAX) {
                err = -EOVERFLOW;
                break;
            }
        }
    
        // update the caller's data
        *lineptr = line;
        *n = size;
    
        // check for various error returns
        if (err != 0) {
            return err;
        }
    
        if (ferror(stream)) {
            return 0;
        }
    
        if (feof(stream) && (count == 0)) {
            if (nx_getdelim_append( &line, &size, count, 0) < 0) {
                return -ENOMEM;
            }
        }
    
        return count;
    }
    
    
    
    
    ssize_t nx_getline(char **lineptr, size_t *n, FILE *stream)
    {
        return nx_getdelim( lineptr, n, '\n', stream);
    }
    
    
    
    /*
        versions of getline() and getdelim() that attempt to follow
        POSIX semantics (ie. they set errno on error returns and
        return -1 when the stream error indicator or end-of-file
        indicator is set (ie., ferror() or feof() would return
        non-zero).
    */
    ssize_t getdelim(char **lineptr, size_t *n, char delim, FILE *stream)
    {
        ssize_t retval = nx_getdelim( lineptr, n, delim, stream);
    
        if (retval < 0) {
            errno = -retval;
            retval = -1;
        }
    
        if (retval == 0) {
            retval = -1;
        }
    
        return retval;
    }
    
    ssize_t getline(char **lineptr, size_t *n, FILE *stream)
    {
        return getdelim( lineptr, n, '\n', stream);
    }
    
    
    /* 
        A simple function to return the next line of text in a dynamically 
        allocated buffer
    
        On error a NULL pointer is returned.  When the caller no longer needs the 
        returned data, the pointer returned should be passed to `free()`.
    
    
     */
    char* getline_simple( FILE* stream)
    {
        char* p = NULL;
        size_t size = 0;
    
        ssize_t result = getline( &p, &size, stream);
    
        if (result < 0) {
            free(p);
            p = NULL;
        }
    
        return p;
    }
    

    Disclaimer - this code has worked well enough for my purposes, but I don't warrant that will be the case for you. Please use due diligence if intending to use this code.

    0 讨论(0)
  • 2020-12-17 00:45

    You need to do it incrementally in a loop, using fgets() to read blocks of data, and realloc() to enlarge the memory buffer if you didn't catch the end of the line.

    There might be some library function to do that for you, though.

    #define BLOCKSIZE 1024
    
    char *readAllocLine(FILE *fp) {
        char    *line = NULL;
        size_t  maxlength = 0;
        assert(fp != NULL);
        for(;;) { // Read the line in BLOCKSIZE -blocks.
            char *crlf, *block;
    
            maxlength += BLOCKSIZE;
            // This exploits realloc behaviour upon hitting NULL
            if (NULL == (line = realloc(line, maxlength+1))) {
                break; // realloc error returns NULL.
            }
            block = line + maxlength - BLOCKSIZE;
            // BLOCKSIZE+1 to accommodate final zero
            if (NULL == fgets(block, BLOCKSIZE+1, fp)) {
                // TODO: rewind fp in case of error.
                if (block == line) {
                        // Error.
                        free(line); line = NULL;
                }
                break;
            }
            // This was the last block iff we find a CRLF inside.
            if (NULL != (crlf = strchr(block, '\n'))) {
                *crlf = 0x0;
                if (crlf != block) {
                    if ('\r' == *(--crlf))
                        *crlf = 0x0;
                }
                break;
            } /* if */
        } /* for */
        return line;
    }
    

    Tested with this main

    int main(int argc, char **argv) {
        FILE *fp;
        if (argc !=2 ) {
                fprintf(stderr, "Syntax: %s testfile.txt\n", argv[0]);
                return -1;
        }
        fp = fopen(argv[1], "r");
        while(!feof(fp)) {
            char *s = readAllocLine(fp);
            if (NULL != s) {
                printf("\"%s\"\n", s);
                free(s);
            } else {
                printf("--- end of file ---\n");
                break;
            }
        }
        fclose(fp);
        return 0;
    }
    

    and this script:

    for i in $( seq 1020 1028 ); do
        # Didn't want to think over 
        yes x | tr -d "\n" | dd of=test bs=1 count=$i 2>/dev/null
        ./readline test | wc -c | tr "\n" ","
        echo "" >> test
        ./readline test | wc -c | tr "\n" ","
        echo "" >> test
        ./readline test | wc -c
    done
    
    0 讨论(0)
提交回复
热议问题