可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
I have an ArrayBuffer
which contains a string encoded using UTF-8 and I can't find a standard way of converting such ArrayBuffer
into a JS String
(which I understand is encoded using UTF-16).
I've seen this code in numerous places, but I fail to see how it would work with any UTF-8 code points that are longer than 1 byte.
return String.fromCharCode.apply(null, new Uint8Array(data));
Similarly, I can't find a standard way of converting from a String
to a UTF-8 encoded ArrayBuffer
.
回答1:
function stringToUint(string) { var string = btoa(unescape(encodeURIComponent(string))), charList = string.split(''), uintArray = []; for (var i = 0; i < charList.length; i++) { uintArray.push(charList[i].charCodeAt(0)); } return new Uint8Array(uintArray); } function uintToString(uintArray) { var encodedString = String.fromCharCode.apply(null, uintArray), decodedString = decodeURIComponent(escape(atob(encodedString))); return decodedString; }
I have done, with some help from the internet, these little functions, they should solve your problems! Here is the working JSFiddle.
EDIT:
Since the source of the Uint8Array is external and you can't use atob
you just need to remove it(working fiddle):
function uintToString(uintArray) { var encodedString = String.fromCharCode.apply(null, uintArray), decodedString = decodeURIComponent(escape(encodedString)); return decodedString; }
回答2:
This should work:
// http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt /* utf.js - UTF-8 <=> UTF-16 convertion * * Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp> * Version: 1.0 * LastModified: Dec 25 1999 * This library is free. You can redistribute it and/or modify it. */ function Utf8ArrayToStr(array) { var out, i, len, c; var char2, char3; out = ""; len = array.length; i = 0; while(i < len) { c = array[i++]; switch(c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx out += String.fromCharCode(c); break; case 12: case 13: // 110x xxxx 10xx xxxx char2 = array[i++]; out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: // 1110 xxxx 10xx xxxx 10xx xxxx char2 = array[i++]; char3 = array[i++]; out += String.fromCharCode(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; } } return out; }
It's somewhat cleaner as the other solutions because it doesn't use any hacks nor depends on Browser JS functions, e.g. works also in other JS environments.
Check out the JSFiddle demo.
Also see the related questions: here, here
回答3:
There's a polyfill for Encoding over on Github: text-encoding. It's easy for Node or the browser, and the Readme advises the following:
var uint8array = TextEncoder(encoding).encode(string); var string = TextDecoder(encoding).decode(uint8array);
If I recall, 'utf-8'
is the encoding
you need, and of course you'll need to wrap your buffer:
var uint8array = new Uint8Array(utf8buffer);
Hope it works as well for you as it has for me.
回答4:
Using TextEncoder and TextDecoder
var uint8array = new TextEncoder("utf-8").encode("Plain Text"); var string = new TextDecoder().decode(uint8array); console.log(uint8array ,string )
回答5:
If you are doing this in browser there are no character encoding libraries built-in, but you can get by with:
function pad(n) { return n.length < 2 ? "0" + n : n; } var array = new Uint8Array(data); var str = ""; for( var i = 0, len = array.length; i < len; ++i ) { str += ( "%" + pad(array[i].toString(16))) } str = decodeURIComponent(str);
Here's a demo that decodes a 3-byte UTF-8 unit: http://jsfiddle.net/Z9pQE/
回答6:
I faced the same issue but needed to be able to parse/write UTF8 encoded datas progressively. Here is a lib i just made to face this issue https://github.com/nfroidure/UTF8.js .
Edit: It seems that Mozilla is cooking something for us: StringView ( https://developer.mozilla.org/en-US/docs/Code_snippets/StringView?redirectlocale=en-US&redirectslug=Web%2FJavaScript%2FTyped_arrays%2FStringView#encoding_values )
回答7:
The methods readAsArrayBuffer and readAsText from a FileReader object converts a Blob object to an ArrayBuffer or to a DOMString asynchronous.
A Blob object type can be created from a raw text or byte array, for example.
let blob = new Blob([text], { type: "text/plain" }); let reader = new FileReader(); reader.onload = event => { let buffer = event.target.result; }; reader.readAsArrayBuffer(blob);
I think it's better to pack up this in a promise:
回答8:
The main problem of programmers looking for conversion from byte array into a string is UTF-8 encoding (compression) of unicode characters. This code will help you:
var getString = function (strBytes) { var MAX_SIZE = 0x4000; var codeUnits = []; var highSurrogate; var lowSurrogate; var index = -1; var result = ''; while (++index < strBytes.length) { var codePoint = Number(strBytes[index]); if (codePoint === (codePoint & 0x7F)) { } else if (0xF0 === (codePoint & 0xF0)) { codePoint ^= 0xF0; codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); } else if (0xE0 === (codePoint & 0xE0)) { codePoint ^= 0xE0; codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); } else if (0xC0 === (codePoint & 0xC0)) { codePoint ^= 0xC0; codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80); } if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint) throw RangeError('Invalid code point: ' + codePoint); if (codePoint <= 0xFFFF) codeUnits.push(codePoint); else { codePoint -= 0x10000; highSurrogate = (codePoint >> 10) | 0xD800; lowSurrogate = (codePoint % 0x400) | 0xDC00; codeUnits.push(highSurrogate, lowSurrogate); } if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) { result += String.fromCharCode.apply(null, codeUnits); codeUnits.length = 0; } } return result; }
All the best !