问题
I've been trying to find a solution that works but couldn't find one.
I have an object in javascript and it has some non-english characters in it.
I'm trying the following code to convert the object to a blob for download.
When I click to download the content, when opening the downloaded JSON the non-English characters are gibberish.
It's a simple object like this one: {name: "שלומית", last: "רעננה"}
function setJSONForDownload(obj) {
obj = obj || []; // obj is the array of objects with non-english characters
const length = obj.length;
if (length) {
const str = JSON.stringify(obj);
const data = encode( str );
const blob = new Blob( [ data ], {
type: "application/json;charset=utf-8"
});
const url = URL.createObjectURL( blob );
const downloadElem = document.getElementById('download');
downloadElem.innerText = `Download ${length} pages scraped`;
downloadElem.setAttribute( 'href', url );
downloadElem.setAttribute( 'download', 'data.json' );
}
else {
document.getElementById('download').innerText = `No data to download...`;
}
}
function encode (s) {
const out = [];
for ( let i = 0; i < s.length; i++ ) {
out[i] = s.charCodeAt(i);
}
return new Uint8Array(out);
}
回答1:
Your encode
function is broken, as it casts charcodes to bytes. Don't try to implement this yourself, just use the Encoding API:
const str = JSON.stringify(obj);
const bytes = new TextEncoder().encode(str);
const blob = new Blob([bytes], {
type: "application/json;charset=utf-8"
});
回答2:
I found a nice block of code that solved my issue.
Thanks to 'pascaldekloe' (https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330).
Just changed the encode method to the following:
function encode(s) {
var i = 0, bytes = new Uint8Array(s.length * 4);
for (var ci = 0; ci != s.length; ci++) {
var c = s.charCodeAt(ci);
if (c < 128) {
bytes[i++] = c;
continue;
}
if (c < 2048) {
bytes[i++] = c >> 6 | 192;
} else {
if (c > 0xd7ff && c < 0xdc00) {
if (++ci >= s.length)
throw new Error('UTF-8 encode: incomplete surrogate pair');
var c2 = s.charCodeAt(ci);
if (c2 < 0xdc00 || c2 > 0xdfff)
throw new Error('UTF-8 encode: second surrogate character 0x' + c2.toString(16) + ' at index ' + ci + ' out of range');
c = 0x10000 + ((c & 0x03ff) << 10) + (c2 & 0x03ff);
bytes[i++] = c >> 18 | 240;
bytes[i++] = c >> 12 & 63 | 128;
} else bytes[i++] = c >> 12 | 224;
bytes[i++] = c >> 6 & 63 | 128;
}
bytes[i++] = c & 63 | 128;
}
return bytes.subarray(0, i);
}
回答3:
Calling new Blob([DOMString]) will automatically convert your DOMString (UTF-16) to UTF-8.
So all you need is new Blob( [JSON.stringify(obj)] )
.
Note that the type
won't get used here (it would be only if there was a fetching or if you actually tried to read the Blob), and will anyway only have incidence on how the file might get read (i.e by FileReader.readAsText()), but not on the actual content of the file, so no need to set it.
setJSONForDownload([{ name: "שלומית", last: "רעננה"}]);
function setJSONForDownload(obj) {
obj = obj || [];
const length = obj.length;
if (length) {
// DOMString
const str = JSON.stringify(obj);
// text/plain;UTF-8
const blob = new Blob([str]);
const url = URL.createObjectURL(blob);
const downloadElem = document.getElementById('download');
downloadElem.innerText = `Download ${length} pages scraped`;
downloadElem.setAttribute('href', url);
downloadElem.setAttribute('download', 'data.json');
} else {
document.getElementById('download').innerText = `No data to download...`;
}
}
<a id="download">dl</a>
来源:https://stackoverflow.com/questions/53929108/how-to-convert-a-javascript-object-to-utf-8-blob-for-download