I\'m reading out lots of texts from various RSS feeds and inserting them into my database.
Of course, there are several different character encodings used in the fee
Get encoding from headers and convert it to utf-8.
$post_url='http://website.domain';
/// Get headers ////////////////////////////////////////////////////////////
function get_headers_curl($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
$r = curl_exec($ch);
return $r;
}
$the_header = get_headers_curl($post_url);
/// check for redirect /////////////////////////////////////////////////
if (preg_match("/Location:/i", $the_header)) {
$arr = explode('Location:', $the_header);
$location = $arr[1];
$location=explode(chr(10), $location);
$location = $location[0];
$the_header = get_headers_curl(trim($location));
}
/// Get charset /////////////////////////////////////////////////////////////////////
if (preg_match("/charset=/i", $the_header)) {
$arr = explode('charset=', $the_header);
$charset = $arr[1];
$charset=explode(chr(10), $charset);
$charset = $charset[0];
}
///////////////////////////////////////////////////////////////////////////////
// echo $charset;
if($charset && $charset!='UTF-8') { $html = iconv($charset, "UTF-8", $html); }