The function below is designed to apply rel=\"nofollow\" attributes to all external links and no internal links unless the path matches a predefined root URL de
Thanks @alex for your nice solution. But, I was having a problem with Japanese text. I have fixed it as following way. Also, this code can skip multiple domains with the $whiteList array.
public function addRelNoFollow($html, $whiteList = [])
{
$dom = new \DOMDocument();
$dom->preserveWhiteSpace = false;
$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$a = $dom->getElementsByTagName('a');
/** @var \DOMElement $anchor */
foreach ($a as $anchor) {
$href = $anchor->attributes->getNamedItem('href')->nodeValue;
$domain = parse_url($href, PHP_URL_HOST);
// Skip whiteList domains
if (in_array($domain, $whiteList, true)) {
continue;
}
// Check & get existing rel attribute values
$noFollow = 'nofollow';
$rel = $anchor->attributes->getNamedItem('rel');
if ($rel) {
$values = explode(' ', $rel->nodeValue);
if (in_array($noFollow, $values, true)) {
continue;
}
$values[] = $noFollow;
$newValue = implode($values, ' ');
} else {
$newValue = $noFollow;
}
// Create new rel attribute
$rel = $dom->createAttribute('rel');
$node = $dom->createTextNode($newValue);
$rel->appendChild($node);
$anchor->appendChild($rel);
}
// There is a problem with saveHTML() and saveXML(), both of them do not work correctly in Unix.
// They do not save UTF-8 characters correctly when used in Unix, but they work in Windows.
// So we need to do as follows. @see https://stackoverflow.com/a/20675396/1710782
return $dom->saveHTML($dom->documentElement);
}