I\'m writing a Chrome extension that involves doing a lot of the following job: sanitizing strings that might contain HTML tags, by converting
The AngularJS source code also has a version inside of angular-sanitize.js.
var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
// Match everything outside of normal chars and " (quote character)
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
/**
* Escapes all potentially dangerous characters, so that the
* resulting string can be safely inserted into attribute or
* element text.
* @param value
* @returns {string} escaped text
*/
function encodeEntities(value) {
return value.
replace(/&/g, '&').
replace(SURROGATE_PAIR_REGEXP, function(value) {
var hi = value.charCodeAt(0);
var low = value.charCodeAt(1);
return '' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
}).
replace(NON_ALPHANUMERIC_REGEXP, function(value) {
return '' + value.charCodeAt(0) + ';';
}).
replace(//g, '>');
}