I want to remove all the html tags except
or
tags from a string using javascript.
I have seen many questions like this but t
Use a negative lookahead (by using a regex such as /<(?!br\s*\/?)[^>]+>/g
):
var html = 'this is my <b>string</b> and it\'s pretty cool<br />isn\'t it?<br>Yep, it is. <strong>More HTML tags</strong>';
html = html.replace(/<(?!br\s*\/?)[^>]+>/g, '');
console.log(html);
//this is my string and it's pretty cool<br />isn't it?<br>Yep, it is. More HTML tags
Demo
I've adapted Sandro Rosa's function to address the issue mentioned by Nikita:
function strip_tags( _html /*you can put each single tag per argument*/ )
{
var _tags = [], _tag = "";
for ( var _a = 1 ; _a < arguments.length ; _a++ ) {
_tag = arguments[_a].replace(/[<>\/]/g, '').trim();
if ( arguments[_a].length > 0 ) _tags.push( _tag );
}
if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "";
else if ( _tags.length == 0 ) return _html.replace( /<\s*\/?[^>]+>/g, "" );
else
{
var _re = new RegExp( "<(?!\\s*\\/?(" + _tags.join("|") + ")\\s*\\/?>)[^>]*>", "g" );
return _html.replace( _re, '' );
}
}
var _html = "<b>Just</b> some <i>tags</i> and text to test <u>this code</u>" ;
console.log( "This is the original html code including some tags" );
console.log( _html ); // original html code
console.log( "Now we remove all tags (plain text)" );
console.log( strip_tags( _html ) ); // remove all tags
console.log( "Only the bold tag is kept" );
console.log( strip_tags( _html, "b" ) ); // keep <b> only
console.log( "Only the underline tag is kept" );
console.log( strip_tags( _html, "u" ) ); // keep <u> only
console.log( "Only the italic tag is kept" );
console.log( strip_tags( _html, "<i>" ) ); // keep <i> only
console.log( "Keeping both italic and underline" );
console.log( strip_tags( _html, "i", "u" ) ); // keep both <i> and <u>
_html = "this is my <b>string</b> and it's pretty cool<br />isn't it?<br>Yep, it is.<strong>More HTML tags</strong><span></span><bol>" ;
console.log( "Keeping just the bold tag" );
console.log( strip_tags( _html, "b" ) ); // keep just the <b>, not the <br> or <bol>
I've worked on the last suggestion to develop a function removing all or just keeping some tags
function strip_tags( _html /*you can put each single tag per argument*/ )
{
var _tags = [], _tag = "" ;
for( var _a = 1 ; _a < arguments.length ; _a++ )
{
_tag = arguments[_a].replace( /<|>/g, '' ).trim() ;
if ( arguments[_a].length > 0 ) _tags.push( _tag, "/"+_tag );
}
if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "" ;
else if ( _tags.length == 0 ) return _html.replace( /<(\s*\/?)[^>]+>/g, "" ) ;
else
{
var _re = new RegExp( "<(?!("+_tags.join("|")+")\s*\/?)[^>]+>", "g" );
return _html.replace( _re, '' );
}
}
var _html = "<b>Just</b> some <i>tags</i> and text to test <u>this code</u>" ;
document.write( "This is the original html code including some tags<br>" );
document.write( _html + "<br><br>" ); // original html code
document.write( "Now we remove all tags (plain text)<br>" );
document.write( strip_tags( _html ) + "<br><br>" ); // remove all tags
document.write( "Only the bold tag is kept<br>" );
document.write( strip_tags( _html, "b" ) + "<br><br>" ); // keep <b> only
document.write( "Only the underline tag is kept<br>" );
document.write( strip_tags( _html, "u" ) + "<br><br>" ); // keep <u> only
document.write( "Only the italic tag is kept<br>" );
document.write( strip_tags( _html, "<i>" ) + "<br><br>" ); // keep <i> only
document.write( "Keeping both italic and underline<br>" );
document.write( strip_tags( _html, "i", "u" ) ); // keep both <i> and <u>
Try This
function remove_tags(html)
{
var html = html.replace("<br>","||br||");
var tmp = document.createElement("DIV");
tmp.innerHTML = html;
html = tmp.textContent||tmp.innerText;
return html.replace("||br||","<br>");
}
To expand h2ooooooo Answer to include leading spaces and be case insenctive you could use
/<(?!\s*br\s*\/?)[^>]+>/gi