I dont parse this url: http://foldmunka.net
$ch = curl_init(\"http://foldmunka.net\");
//curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_RE
Here is a solution with DomDocument and DOMXPath. It is much shorter and runs much faster (~100ms against ~2300ms) than the other solution with Simple HTML DOM Parser.
loadHtmlFile($source);
// use this instead of loadHtmlFile() to load from string:
//$dom->loadHtml('Hello Hello this site
click Some text.');
$xpath = new DOMXPath($dom);
$plain = '';
foreach ($xpath->query('//text()|//a|//img') as $node)
{
if ($node->nodeName == '#cdata-section')
continue;
if ($node instanceof DOMElement)
{
if ($node->hasAttribute('alt'))
$plain .= $node->getAttribute('alt') . ' ';
if ($node->hasAttribute('title'))
$plain .= $node->getAttribute('title') . ' ';
}
if ($node instanceof DOMText)
$plain .= $node->textContent . ' ';
}
return $plain;
}
echo makePlainText('http://foldmunka.net');