'Reliable' SMS Unicode & GSM Encoding in PHP

前端 未结 6 985
梦毁少年i
梦毁少年i 2020-12-08 08:22

(Updated a little)

I\'m not very experienced with internationalization using PHP, it must be said, and a deal of searching didn\'t really provide th

6条回答
  •  广开言路
    2020-12-08 09:01

    I know this isnt php code, but I think it might help anyway. This is how I do it in an app I wrote to detect if its possible to send as GSM 03.38 (you could do something similar for plain text). It has two translation tables, one for normal GSM and one for the extended. And then a function that loops through all characters checking if it can be converted.

    #define UCS2_TO_GSM_LOOKUP_TABLE_SIZE    0x100
    #define NON_GSM                              0x80 
    #define UCS2_GCL_RANGE                  24
    #define UCS2_GREEK_CAPITAL_LETTER_ALPHA 0x0391
    #define EXTEND                                0x001B
    // note that the ` character is mapped to ' so that all characters that can be typed on
    // a standard north american keyboard can be converted to the GSM default character set
    static unsigned char  Ucs2ToGsm[UCS2_TO_GSM_LOOKUP_TABLE_SIZE] =
    {           /*+0x0      +0x1        +0x2        +0x3        +0x4        +0x5        +0x6        +0x7*/
    /*0x00*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0x08*/    NON_GSM,    NON_GSM,    0x0a,       NON_GSM,    NON_GSM,    0x0d,       NON_GSM,    NON_GSM,
    /*0x10*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0x18*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0x20*/    0x20,       0x21,       0x22,       0x23,       0x02,       0x25,       0x26,       0x27,
    /*0x28*/    0x28,       0x29,       0x2a,       0x2b,       0x2c,       0x2d,       0x2e,       0x2f,
    /*0x30*/    0x30,       0x31,       0x32,       0x33,       0x34,       0x35,       0x36,       0x37,
    /*0x38*/    0x38,       0x39,       0x3a,       0x3b,       0x3c,       0x3d,       0x3e,       0x3f,
    /*0x40*/    0x00,       0x41,       0x42,       0x43,       0x44,       0x45,       0x46,       0x47,
    /*0x48*/    0x48,       0x49,       0x4a,       0x4b,       0x4c,       0x4d,       0x4e,       0x4f,
    /*0x50*/    0x50,       0x51,       0x52,       0x53,       0x54,       0x55,       0x56,       0x57,
    /*0x58*/    0x58,       0x59,       0x5a,       EXTEND,     EXTEND,     EXTEND,     EXTEND,     0x11,
    /*0x60*/    0x27,       0x61,       0x62,       0x63,       0x64,       0x65,       0x66,       0x67,
    /*0x68*/    0x68,       0x69,       0x6a,       0x6b,       0x6c,       0x6d,       0x6e,       0x6f,
    /*0x70*/    0x70,       0x71,       0x72,       0x73,       0x74,       0x75,       0x76,       0x77,
    /*0x78*/    0x78,       0x79,       0x7a,       EXTEND,     EXTEND,     EXTEND,     EXTEND,     NON_GSM,
    /*0x80*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0x88*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0x90*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0x98*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0xa0*/    NON_GSM,    0x40,       NON_GSM,    0x01,       0x24,       0x03,       NON_GSM,    0x5f,
    /*0xa8*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0xb0*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,
    /*0xb8*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    0x60,
    /*0xc0*/    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    0x5b,       0x0e,       0x1c,       0x09,
    /*0xc8*/    NON_GSM,    0x1f,       NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    0x60,
    /*0xd0*/    NON_GSM,    0x5d,       NON_GSM,    NON_GSM,    NON_GSM,    NON_GSM,    0x5c,       NON_GSM,
    /*0xd8*/    0x0b,       NON_GSM,    NON_GSM,    NON_GSM,    0x5e,       NON_GSM,    NON_GSM,    0x1e,
    /*0xe0*/    0x7f,       NON_GSM,    NON_GSM,    NON_GSM,    0x7b,       0x0f,       0x1d,       NON_GSM,
    /*0xe8*/    0x04,       0x05,       NON_GSM,    NON_GSM,    0x07,       NON_GSM,    NON_GSM,    NON_GSM,
    /*0xf0*/    NON_GSM,    0x7d,       0x08,       NON_GSM,    NON_GSM,    NON_GSM,    0x7c,       NON_GSM,
    /*0xf8*/    0x0c,       0x06,       NON_GSM,    NON_GSM,    0x7e,       NON_GSM,    NON_GSM,    NON_GSM
    };
    
    static unsigned char Ucs2GclToGsm[UCS2_GCL_RANGE + 1] =
    {
    /*0x0391*/  0x41, // Alpha A
    /*0x0392*/  0x42, // Beta B
    /*0x0393*/  0x13, // Gamma
    /*0x0394*/  0x10, // Delta
    /*0x0395*/  0x45, // Epsilon E
    /*0x0396*/  0x5A, // Zeta Z
    /*0x0397*/  0x48, // Eta H
    /*0x0398*/  0x19, // Theta
    /*0x0399*/  0x49, // Iota I
    /*0x039a*/  0x4B, // Kappa K
    /*0x039b*/  0x14, // Lambda
    /*0x039c*/  0x4D, // Mu M
    /*0x039d*/  0x4E, // Nu N
    /*0x039e*/  0x1A, // Xi
    /*0x039f*/  0x4F, // Omicron O
    /*0x03a0*/  0X16, // Pi
    /*0x03a1*/  0x50, // Rho P
    /*0x03a2*/  NON_GSM,
    /*0x03a3*/  0x18, // Sigma
    /*0x03a4*/  0x54, // Tau T
    /*0x03a5*/  0x59, // Upsilon Y
    /*0x03a6*/  0x12, // Phi 
    /*0x03a7*/  0x58, // Chi X
    /*0x03a8*/  0x17, // Psi
    /*0x03a9*/  0x15  // Omega
    };
    
    bool Gsm0338Encoding::IsNotGSM( wchar_t szUnicodeChar )
    {
        bool    result = true;
        if( szUnicodeChar < UCS2_TO_GSM_LOOKUP_TABLE_SIZE )
        {
            result = ( Ucs2ToGsm[szUnicodeChar] == NON_GSM );
        }
        else if( (szUnicodeChar >= UCS2_GREEK_CAPITAL_LETTER_ALPHA) &&
                    (szUnicodeChar <= (UCS2_GREEK_CAPITAL_LETTER_ALPHA + UCS2_GCL_RANGE)) )
        {
            result = ( Ucs2GclToGsm[szUnicodeChar - UCS2_GREEK_CAPITAL_LETTER_ALPHA] == NON_GSM );
        }
        else if( szUnicodeChar == 0x20AC ) // €
        {
            result = false;
        }
        return result;
    }
    
    bool Gsm0338Encoding::IsGSM( const std::wstring& str )
    {
        bool    result = true;
        if( std::find_if( str.begin(), str.end(), IsNotGSM ) != str.end() )
        {
            result = false;
        }
        return result;
    }
    

提交回复
热议问题