I\'m really stumped on how Twitter expects users of its API to convert the plaintext tweets it sends to properly linked HTML.
Here\'s the deal: Twitter\'s JSON API sends
Here is an updated answer that works with Twitter's new Extended Mode. It combines the answer by @vita10gy and the comment by @Hugo (to make it utf8 compatible), with a few minor tweaks to work with the new api values.
function utf8_substr_replace($original, $replacement, $position, $length) {
$startString = mb_substr($original, 0, $position, "UTF-8");
$endString = mb_substr($original, $position + $length, mb_strlen($original), "UTF-8");
$out = $startString . $replacement . $endString;
return $out;
}
function json_tweet_text_to_HTML($tweet, $links=true, $users=true, $hashtags=true) {
// Media urls can show up on the end of the full_text tweet, but twitter doesn't index that url.
// The display_text_range indexes show the actual tweet text length.
// Cut the string off at the end to get rid of this unindexed url.
$return = mb_substr($tweet->full_text, $tweet->display_text_range[0],$tweet->display_text_range[1]);
$entities = array();
if($links && is_array($tweet->entities->urls))
{
foreach($tweet->entities->urls as $e)
{
$temp["start"] = $e->indices[0];
$temp["end"] = $e->indices[1];
$temp["replacement"] = " ".$e->display_url."";
$entities[] = $temp;
}
}
if($users && is_array($tweet->entities->user_mentions))
{
foreach($tweet->entities->user_mentions as $e)
{
$temp["start"] = $e->indices[0];
$temp["end"] = $e->indices[1];
$temp["replacement"] = " @".$e->screen_name."";
$entities[] = $temp;
}
}
if($hashtags && is_array($tweet->entities->hashtags))
{
foreach($tweet->entities->hashtags as $e)
{
$temp["start"] = $e->indices[0];
$temp["end"] = $e->indices[1];
$temp["replacement"] = " #".$e->text."";
$entities[] = $temp;
}
}
usort($entities, function($a,$b){return($b["start"]-$a["start"]);});
foreach($entities as $item)
{
$return = utf8_substr_replace($return, $item["replacement"], $item["start"], $item["end"] - $item["start"]);
}
return($return);
}