Efficiently replace all accented characters in a string?

后端 未结 21 2910
别跟我提以往
别跟我提以往 2020-11-22 04:35

For a poor man\'s implementation of near-collation-correct sorting on the client side I need a JavaScript function that does efficient single character rep

21条回答
  •  借酒劲吻你
    2020-11-22 05:21

    Basing on existing answers and some suggestions, I've created this one:

    String.prototype.removeAccents = function() {
    
        var removalMap = {
            'A'  : /[AⒶAÀÁÂẦẤẪẨÃĀĂẰẮẴẲȦǠÄǞẢÅǺǍȀȂẠẬẶḀĄ]/g,
            'AA' : /[Ꜳ]/g,
            'AE' : /[ÆǼǢ]/g,
            'AO' : /[Ꜵ]/g,
            'AU' : /[Ꜷ]/g,
            'AV' : /[ꜸꜺ]/g,
            'AY' : /[Ꜽ]/g,
            'B'  : /[BⒷBḂḄḆɃƂƁ]/g,
            'C'  : /[CⒸCĆĈĊČÇḈƇȻꜾ]/g,
            'D'  : /[DⒹDḊĎḌḐḒḎĐƋƊƉꝹ]/g,
            'DZ' : /[DZDŽ]/g,
            'Dz' : /[DzDž]/g,
            'E'  : /[EⒺEÈÉÊỀẾỄỂẼĒḔḖĔĖËẺĚȄȆẸỆȨḜĘḘḚƐƎ]/g,
            'F'  : /[FⒻFḞƑꝻ]/g,
            'G'  : /[GⒼGǴĜḠĞĠǦĢǤƓꞠꝽꝾ]/g,
            'H'  : /[HⒽHĤḢḦȞḤḨḪĦⱧⱵꞍ]/g,
            'I'  : /[IⒾIÌÍÎĨĪĬİÏḮỈǏȈȊỊĮḬƗ]/g,
            'J'  : /[JⒿJĴɈ]/g,
            'K'  : /[KⓀKḰǨḲĶḴƘⱩꝀꝂꝄꞢ]/g,
            'L'  : /[LⓁLĿĹĽḶḸĻḼḺŁȽⱢⱠꝈꝆꞀ]/g,
            'LJ' : /[LJ]/g,
            'Lj' : /[Lj]/g,
            'M'  : /[MⓂMḾṀṂⱮƜ]/g,
            'N'  : /[NⓃNǸŃÑṄŇṆŅṊṈȠƝꞐꞤ]/g,
            'NJ' : /[NJ]/g,
            'Nj' : /[Nj]/g,
            'O'  : /[OⓄOÒÓÔỒỐỖỔÕṌȬṎŌṐṒŎȮȰÖȪỎŐǑȌȎƠỜỚỠỞỢỌỘǪǬØǾƆƟꝊꝌ]/g,
            'OI' : /[Ƣ]/g,
            'OO' : /[Ꝏ]/g,
            'OU' : /[Ȣ]/g,
            'P'  : /[PⓅPṔṖƤⱣꝐꝒꝔ]/g,
            'Q'  : /[QⓆQꝖꝘɊ]/g,
            'R'  : /[RⓇRŔṘŘȐȒṚṜŖṞɌⱤꝚꞦꞂ]/g,
            'S'  : /[SⓈSẞŚṤŜṠŠṦṢṨȘŞⱾꞨꞄ]/g,
            'T'  : /[TⓉTṪŤṬȚŢṰṮŦƬƮȾꞆ]/g,
            'TZ' : /[Ꜩ]/g,
            'U'  : /[UⓊUÙÚÛŨṸŪṺŬÜǛǗǕǙỦŮŰǓȔȖƯỪỨỮỬỰỤṲŲṶṴɄ]/g,
            'V'  : /[VⓋVṼṾƲꝞɅ]/g,
            'VY' : /[Ꝡ]/g,
            'W'  : /[WⓌWẀẂŴẆẄẈⱲ]/g,
            'X'  : /[XⓍXẊẌ]/g,
            'Y'  : /[YⓎYỲÝŶỸȲẎŸỶỴƳɎỾ]/g,
            'Z'  : /[ZⓏZŹẐŻŽẒẔƵȤⱿⱫꝢ]/g,
            'a'  : /[aⓐaẚàáâầấẫẩãāăằắẵẳȧǡäǟảåǻǎȁȃạậặḁąⱥɐ]/g,
            'aa' : /[ꜳ]/g,
            'ae' : /[æǽǣ]/g,
            'ao' : /[ꜵ]/g,
            'au' : /[ꜷ]/g,
            'av' : /[ꜹꜻ]/g,
            'ay' : /[ꜽ]/g,
            'b'  : /[bⓑbḃḅḇƀƃɓ]/g,
            'c'  : /[cⓒcćĉċčçḉƈȼꜿↄ]/g,
            'd'  : /[dⓓdḋďḍḑḓḏđƌɖɗꝺ]/g,
            'dz' : /[dzdž]/g,
            'e'  : /[eⓔeèéêềếễểẽēḕḗĕėëẻěȅȇẹệȩḝęḙḛɇɛǝ]/g,
            'f'  : /[fⓕfḟƒꝼ]/g,
            'g'  : /[gⓖgǵĝḡğġǧģǥɠꞡᵹꝿ]/g,
            'h'  : /[hⓗhĥḣḧȟḥḩḫẖħⱨⱶɥ]/g,
            'hv' : /[ƕ]/g,
            'i'  : /[iⓘiìíîĩīĭïḯỉǐȉȋịįḭɨı]/g,
            'j'  : /[jⓙjĵǰɉ]/g,
            'k'  : /[kⓚkḱǩḳķḵƙⱪꝁꝃꝅꞣ]/g,
            'l'  : /[lⓛlŀĺľḷḹļḽḻſłƚɫⱡꝉꞁꝇ]/g,
            'lj' : /[lj]/g,
            'm'  : /[mⓜmḿṁṃɱɯ]/g,
            'n'  : /[nⓝnǹńñṅňṇņṋṉƞɲʼnꞑꞥ]/g,
            'nj' : /[nj]/g,
            'o'  : /[oⓞoòóôồốỗổõṍȭṏōṑṓŏȯȱöȫỏőǒȍȏơờớỡởợọộǫǭøǿɔꝋꝍɵ]/g,
            'oi' : /[ƣ]/g,
            'ou' : /[ȣ]/g,
            'oo' : /[ꝏ]/g,
            'p'  : /[pⓟpṕṗƥᵽꝑꝓꝕ]/g,
            'q'  : /[qⓠqɋꝗꝙ]/g,
            'r'  : /[rⓡrŕṙřȑȓṛṝŗṟɍɽꝛꞧꞃ]/g,
            's'  : /[sⓢsßśṥŝṡšṧṣṩșşȿꞩꞅẛ]/g,
            't'  : /[tⓣtṫẗťṭțţṱṯŧƭʈⱦꞇ]/g,
            'tz' : /[ꜩ]/g,
            'u'  : /[uⓤuùúûũṹūṻŭüǜǘǖǚủůűǔȕȗưừứữửựụṳųṷṵʉ]/g,
            'v'  : /[vⓥvṽṿʋꝟʌ]/g,
            'vy' : /[ꝡ]/g,
            'w'  : /[wⓦwẁẃŵẇẅẘẉⱳ]/g,
            'x'  : /[xⓧxẋẍ]/g,
            'y'  : /[yⓨyỳýŷỹȳẏÿỷẙỵƴɏỿ]/g,
            'z'  : /[zⓩzźẑżžẓẕƶȥɀⱬꝣ]/g,
        };
    
        var str = this;
    
        for(var latin in removalMap) {
          var nonLatin = removalMap[latin];
          str = str.replace(nonLatin , latin);
        }
    
        return str;
    }
    

    It uses real chars instead of unicode list and works well.

    You can use it like

    "ąąą".removeAccents(); // returns "aaa"
    

    You can easily convert this function to not be string prototype. However, as I'm fan of using string prototype in such cases, you'll have to do it yourself.

提交回复
热议问题