I\'m trying to be a bit sneeky and as part of a learning process try and improve my page scraping skills.
One thing i\'ve come across that I have yet to be able to s
If you can't use CURLOPT_FOLLOWLOCATION, I suggest you use a recursive method like this one:
function getUrl($url, $count) {
// max number of redirects
if ($count > 5) {
return false;
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if (!$data) {
return false;
}
$dataArray = explode("\r\n\r\n", $data, 2);
if (count($dataArray) != 2) {
return false;
}
list($header, $body) = $dataArray;
if ($httpCode == 301 || $httpCode == 302) {
$matches = array();
preg_match('/Location:(.*?)\n/', $header, $matches);
if (isset($matches[1])) {
return getUrl(trim($matches[1]), $count + 1);
}
} else {
return $body;
}
}