I\'m currently creating a Node.js webscraper/proxy, but I\'m having trouble parsing relative Urls found in the scripting part of the source, I figured REGEX would do the tri
From a comment by Rob W above about the base tag I wrote an injection function:
function injectBase(html, base) {
// Remove any elements inside
html = html.replace(/(<[^>/]*head[^>]*>)[\s\S]*?(<[^>/]*base[^>]*>)[\s\S]*?(<[^>]*head[^>]*>)/img, "$1 $3");
// Add just before
html = html.replace(/(<[^>/]*head[^>]*>[\s\S]*?)(<[^>]*head[^>]*>)/img, "$1 " + base + " $2");
return(html);
}