Whenever we are fetching some user inputed content with some editing from the database or similar sources, we might retrieve the portion which only contains the opening tag
Erik Arvidsson wrote a nice HTML SAX parser in 2004. http://erik.eae.net/archives/2004/11/20/12.18.31/
It keeps track of the the open tags, so with a minimalistic SAX handler it's possible to insert closing tags at the correct position:
function tidyHTML(html) {
var output = '';
HTMLParser(html, {
comment: function(text) {
// filter html comments
},
chars: function(text) {
output += text;
},
start: function(tagName, attrs, unary) {
output += '<' + tagName;
for (var i = 0; i < attrs.length; i++) {
output += ' ' + attrs[i].name + '=';
if (attrs[i].value.indexOf('"') === -1) {
output += '"' + attrs[i].value + '"';
} else if (attrs[i].value.indexOf('\'') === -1) {
output += '\'' + attrs[i].value + '\'';
} else { // value contains " and ' so it cannot contain spaces
output += attrs[i].value;
}
}
output += '>';
},
end: function(tagName) {
output += '' + tagName + '>';
}
});
return output;
}