How to read unicode (utf-8) / binary file line by line

前端 未结 6 1701
醉酒成梦
醉酒成梦 2020-12-16 15:56

Hi programmers,

I want read line by line a Unicode (UTF-8) text file created by Notepad, i don\'t want display the Unicode string in the screen, i w

6条回答
  •  無奈伤痛
    2020-12-16 16:48

    I found a solution to my problem, and I would like to share the solution to any one interested in reading UTF-8 file in C99.

    void ReadUTF8(FILE* fp)
    {
        unsigned char iobuf[255] = {0};
        while( fgets((char*)iobuf, sizeof(iobuf), fp) )
        {
                size_t len = strlen((char *)iobuf);
                if(len > 1 &&  iobuf[len-1] == '\n')
                    iobuf[len-1] = 0;
                len = strlen((char *)iobuf);
                printf("(%d) \"%s\"  ", len, iobuf);
                if( iobuf[0] == '\n' )
                    printf("Yes\n");
                else
                    printf("No\n");
        }
    }
    
    void ReadUTF16BE(FILE* fp)
    {
    }
    
    void ReadUTF16LE(FILE* fp)
    {
    }
    
    int main()
    {
        FILE* fp = fopen("test_utf8.txt", "r");
        if( fp != NULL)
        {
            // see http://en.wikipedia.org/wiki/Byte-order_mark for explaination of the BOM
            // encoding
            unsigned char b[3] = {0};
            fread(b,1,2, fp);
            if( b[0] == 0xEF && b[1] == 0xBB)
            {
                fread(b,1,1,fp); // 0xBF
                ReadUTF8(fp);
            }
            else if( b[0] == 0xFE && b[1] == 0xFF)
            {
                ReadUTF16BE(fp);
            }
            else if( b[0] == 0 && b[1] == 0)
            {
                fread(b,1,2,fp); 
                if( b[0] == 0xFE && b[1] == 0xFF)
                    ReadUTF16LE(fp);
            }
            else
            {
                // we don't know what kind of file it is, so assume its standard
                // ascii with no BOM encoding
                rewind(fp);
                ReadUTF8(fp);
            }
        }        
    
        fclose(fp);
    }
    

提交回复
热议问题