Rendering Plaintext as HTML maintaining whitespace – without <pre>

这一生的挚爱 提交于 2019-12-03 12:59:52

The solution to do that while still allowing the browser to wrap long lines is to replace each sequence of two spaces with a space and a non break space.

The browser will correctly render all spaces (normal and non break ones), while still wrapping long lines (due to normal spaces).

Javascript:

text = html_escape(text); // dummy function
text = text.replace(/\t/g, '    ')
           .replace(/  /g, '&nbsp; ')
           .replace(/  /g, ' &nbsp;') // second pass
                                      // handles odd number of spaces, where we 
                                      // end up with "&nbsp;" + " " + " "
           .replace(/\r\n|\n|\r/g, '<br />');

Use a zero-width space (&#8203;) to preserve whitespace and allow the text to wrap. The basic idea is to pair each space or sequence of spaces with a zero-width space. Then replace each space with a non-breaking space. You'll also want to encode html and add line breaks.

If you don't care about unicode characters, it's trivial. You can just use string.replace():

function textToHTML(text)
{
    return ((text || "") + "")  // make sure it is a string;
        .replace(/&/g, "&amp;")
        .replace(/</g, "&lt;")
        .replace(/>/g, "&gt;")
        .replace(/\t/g, "    ")
        .replace(/ /g, "&#8203;&nbsp;&#8203;")
        .replace(/\r\n|\r|\n/g, "<br />");
}

If it's ok for the white space to wrap, pair each space with a zero-width space as above. Otherwise, to keep white space together, pair each sequence of spaces with a zero-width space:

    .replace(/ /g, "&nbsp;")
    .replace(/((&nbsp;)+)/g, "&#8203;$1&#8203;")

To encode unicode characters, it's a little more complex. You need to iterate the string:

var charEncodings = {
    "\t": "&nbsp;&nbsp;&nbsp;&nbsp;",
    " ": "&nbsp;",
    "&": "&amp;",
    "<": "&lt;",
    ">": "&gt;",
    "\n": "<br />",
    "\r": "<br />"
};
var space = /[\t ]/;
var noWidthSpace = "&#8203;";
function textToHTML(text)
{
    text = (text || "") + "";  // make sure it is a string;
    text = text.replace(/\r\n/g, "\n");  // avoid adding two <br /> tags
    var html = "";
    var lastChar = "";
    for (var i in text)
    {
        var char = text[i];
        var charCode = text.charCodeAt(i);
        if (space.test(char) && !space.test(lastChar) && space.test(text[i + 1] || ""))
        {
            html += noWidthSpace;
        }
        html += char in charEncodings ? charEncodings[char] :
        charCode > 127 ? "&#" + charCode + ";" : char;
        lastChar = char;
    }
    return html;
}  

Now, just a comment. Without using monospace fonts, you'll lose some formatting. Consider how these lines of text with a monospace font form columns:

ten       seven spaces
eleven    four spaces

Without the monospaced font, you will lose the columns:

 ten       seven spaces
 eleven    four spaces

It seems that the algorithm to fix that would be very complex.

While this doesn't quite meet all your requirements — for one thing it doesn't handle tabs, I've used the following gem, which adds a wordWrap() method to Javascript Strings, on a couple of occasions to do something similar to what you're describing — so it might be a good starting point to come up with something that also does the additional things you want.

//+ Jonas Raoni Soares Silva
//@ http://jsfromhell.com/string/wordwrap [rev. #2]

// String.wordWrap(maxLength: Integer,
//                 [breakWith: String = "\n"],
//                 [cutType: Integer = 0]): String
//
//   Returns an string with the extra characters/words "broken".
//
//     maxLength  maximum amount of characters per line
//     breakWith  string that will be added whenever one is needed to
//                break the line
//     cutType    0 = words longer than "maxLength" will not be broken
//                1 = words will be broken when needed
//                2 = any word that trespasses the limit will be broken

String.prototype.wordWrap = function(m, b, c){
    var i, j, l, s, r;
    if(m < 1)
        return this;
    for(i = -1, l = (r = this.split("\n")).length; ++i < l; r[i] += s)
        for(s = r[i], r[i] = ""; s.length > m; r[i] += s.slice(0, j) + ((s = s.slice(j)).length ? b : ""))
            j = c == 2 || (j = s.slice(0, m + 1).match(/\S*(\s)?$/))[1] ? m : j.input.length - j[0].length
            || c == 1 && m || j.input.length + (j = s.slice(m).match(/^\S*/)).input.length;
    return r.join("\n");
};

I'd also like to comment that it seems to me as though, in general, you'd want to use a monospaced font if tabs are involved because the width of words would vary with the proportional font used (making the results of using of tab stops very font dependent).

Update: Here's a slightly more readable version courtesy of an online javascript beautifier:

String.prototype.wordWrap = function(m, b, c) {
    var i, j, l, s, r;
    if (m < 1)
        return this;
    for (i = -1, l = (r = this.split("\n")).length; ++i < l; r[i] += s)
        for (s = r[i], r[i] = ""; s.length > m; r[i] += s.slice(0, j) + ((s =
                s.slice(j)).length ? b : ""))
            j = c == 2 || (j = s.slice(0, m + 1).match(/\S*(\s)?$/))[1] ? m :
            j.input.length - j[0].length || c == 1 && m || j.input.length +
            (j = s.slice(m).match(/^\S*/)).input.length;
    return r.join("\n");
};

Is is very simple if you use jQuery library in your project.

Just one line ,Add asHTml extenstion to String Class and :

var plain='&lt;a&gt; i am text plain &lt;/a&gt;'
plain.asHtml();
/* '<a> i am text plain </a>' */

DEMO :http://jsfiddle.net/abdennour/B6vGG/3/

Note : You will not have to access to DoM . Just use builder design pattern of jQuery $('<tagName />')

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!