问题
Does JavaScript have a way of truncating HTML text without all the headaches of matching tags etc etc?
Thank you.
回答1:
There's nothing built-in javascript. There's a jQuery plugin that you might take a look at.
回答2:
I had the same problem, and wound up writing the following to deal with it. It truncates HTML to a give length, cleans up any start / end tags that might have gotten snipped off at the end, and then closes any tags left unclosed:
function truncateHTML(text, length) {
var truncated = text.substring(0, length);
// Remove line breaks and surrounding whitespace
truncated = truncated.replace(/(\r\n|\n|\r)/gm,"").trim();
// If the text ends with an incomplete start tag, trim it off
truncated = truncated.replace(/<(\w*)(?:(?:\s\w+(?:={0,1}(["']{0,1})\w*\2{0,1})))*$/g, '');
// If the text ends with a truncated end tag, fix it.
var truncatedEndTagExpr = /<\/((?:\w*))$/g;
var truncatedEndTagMatch = truncatedEndTagExpr.exec(truncated);
if (truncatedEndTagMatch != null) {
var truncatedEndTag = truncatedEndTagMatch[1];
// Check to see if there's an identifiable tag in the end tag
if (truncatedEndTag.length > 0) {
// If so, find the start tag, and close it
var startTagExpr = new RegExp(
"<(" + truncatedEndTag + "\\w?)(?:(?:\\s\\w+(?:=([\"\'])\\w*\\2)))*>");
var testString = truncated;
var startTagMatch = startTagExpr.exec(testString);
var startTag = null;
while (startTagMatch != null) {
startTag = startTagMatch[1];
testString = testString.replace(startTagExpr, '');
startTagMatch = startTagExpr.exec(testString);
}
if (startTag != null) {
truncated = truncated.replace(truncatedEndTagExpr, '</' + startTag + '>');
}
} else {
// Otherwise, cull off the broken end tag
truncated = truncated.replace(truncatedEndTagExpr, '');
}
}
// Now the tricky part. Reverse the text, and look for opening tags. For each opening tag,
// check to see that he closing tag before it is for that tag. If not, append a closing tag.
var testString = reverseHtml(truncated);
var reverseTagOpenExpr = /<(?:(["'])\w*\1=\w+ )*(\w*)>/;
var tagMatch = reverseTagOpenExpr.exec(testString);
while (tagMatch != null) {
var tag = tagMatch[0];
var tagName = tagMatch[2];
var startPos = tagMatch.index;
var endPos = startPos + tag.length;
var fragment = testString.substring(0, endPos);
// Test to see if an end tag is found in the fragment. If not, append one to the end
// of the truncated HTML, thus closing the last unclosed tag
if (!new RegExp("<" + tagName + "\/>").test(fragment)) {
truncated += '</' + reverseHtml(tagName) + '>';
}
// Get rid of the already tested fragment
testString = testString.replace(fragment, '');
// Get another tag to test
tagMatch = reverseTagOpenExpr.exec(testString);
}
return truncated;
}
function reverseHtml(str) {
var ph = String.fromCharCode(206);
var result = str.split('').reverse().join('');
while (result.indexOf('<') > -1) {
result = result.replace('<',ph);
}
while (result.indexOf('>') > -1) {
result = result.replace('>', '<');
}
while (result.indexOf(ph) > -1) {
result = result.replace(ph, '>');
}
return result;
}
回答3:
I know this question is old, but I recently had the same problem. I wrote the following library, which truncates valid HTML safely: https://github.com/arendjr/text-clipper
回答4:
There's a mootools plugin which does exactly what you need: mooReadAll at mootools forge
回答5:
I just recently finished a jQuery function to do this using the width & height of the container. Test it out and see if it works for you. I'm not yet sure of all the compatibility issues, bugs, or limitations but I've tested it in FF, Chrome, and IE7.
- http://blog.malcolmp.com/?p=974
- http://blog.malcolmp.com/truncator-example/test.html
回答6:
If you want a light-weight solution in vanilla JS, this should do the trick, although it'll leave empty elements around, so it depends on if you care about those. Also note that it mutates the nodes in-place.
function truncateNode(node, limit) {
if (node.nodeType === Node.TEXT_NODE) {
node.textContent = node.textContent.substring(0, limit);
return limit - node.textContent.length;
}
node.childNodes.forEach((child) => {
limit = truncateNode(child, limit);
});
return limit;
}
const span = document.createElement('span');
span.innerHTML = '<b>foo</b><i>bar</i><u>baz</u>';
truncateNode(span, 5);
expect(span.outerHTML).toEqual('<span><b>foo</b><i>ba</i><u></u></span>');
回答7:
That's quite challenging.
If you don't have any HTML markup, the following might be useful.
- http://www.barelyfitz.com/projects/truncate/
来源:https://stackoverflow.com/questions/3822233/javascript-truncate-html-text