Here is an excerpt function:
function excerpt($text, $phrase, $radius = 100, $ending = \"...\") {
270 if (empty($text) or empty($phrase)) {
2
I could not contact erisco, so I am posting his function with multiple fixes (most importantly multibyte support).
/**
* @param string $text text to be searched
* @param string $phrase search string
* @param int $span approximate length of the excerpt
* @param string $delimiter string to use as a suffix and/or prefix if the excerpt is from the middle of a text
*
* @return string
*/
public static function excerpt($text, $phrase, $span = 100, $delimiter = '...')
{
$phrases = preg_split('/\s+/u', $phrase);
$regexp = '/\b(?:';
foreach($phrases as $phrase)
{
$regexp.= preg_quote($phrase, '/') . '|';
}
$regexp = mb_substr($regexp, 0, -1) .')\b/ui';
$matches = [];
preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
$matches = $matches[0];
$nodes = [];
foreach($matches as $match)
{
$node = new stdClass;
$node->phraseLength = mb_strlen($match[0]);
$node->position = mb_strlen(substr($text, 0, $match[1])); // calculate UTF-8 position (@see https://bugs.php.net/bug.php?id=67487)
$nodes[] = $node;
}
if(count($nodes) > 0)
{
$clust = new stdClass;
$clust->nodes[] = array_shift($nodes);
$clust->length = $clust->nodes[0]->phraseLength;
$clust->i = 0;
$clusters = new stdClass;
$clusters->data =
[
$clust
];
$clusters->i = 0;
foreach($nodes as $node)
{
$lastClust = $clusters->data[$clusters->i];
$lastNode = $lastClust->nodes[$lastClust->i];
$addedLength = $node->position - $lastNode->position - $lastNode->phraseLength + $node->phraseLength;
if($lastClust->length + $addedLength <= $span)
{
$lastClust->nodes[] = $node;
$lastClust->length+= $addedLength;
$lastClust->i++;
}
else
{
if($addedLength > $span)
{
$newClust = new stdClass;
$newClust->nodes =
[
$node
];
$newClust->i = 0;
$newClust->length = $node->phraseLength;
$clusters->data[] = $newClust;
$clusters->i++;
}
else
{
$newClust = clone $lastClust;
while($newClust->length + $addedLength > $span)
{
$shiftedNode = array_shift($newClust->nodes);
if($shiftedNode === null)
{
break;
}
$newClust->i--;
$removedLength = $shiftedNode->phraseLength;
if(isset($newClust->nodes[0]))
{
$removedLength+= $newClust->nodes[0]->position - $shiftedNode->position;
}
$newClust->length-= $removedLength;
}
if($newClust->i < 0)
{
$newClust->i = 0;
}
$newClust->nodes[] = $node;
$newClust->length+= $addedLength;
$clusters->data[] = $newClust;
$clusters->i++;
}
}
}
$bestClust = $clusters->data[0];
$bestClustSize = count($bestClust->nodes);
foreach($clusters->data as $clust)
{
$newClustSize = count($clust->nodes);
if($newClustSize > $bestClustSize)
{
$bestClust = $clust;
$bestClustSize = $newClustSize;
}
}
$clustLeft = $bestClust->nodes[0]->position;
$clustLen = $bestClust->length;
$padding = intval(round(($span - $clustLen) / 2));
$clustLeft-= $padding;
if($clustLeft < 0)
{
$clustLen+= $clustLeft * -1 + $padding;
$clustLeft = 0;
}
else
{
$clustLen+= $padding * 2;
}
}
else
{
$clustLeft = 0;
$clustLen = $span;
}
$textLen = mb_strlen($text);
$prefix = '';
$suffix = '';
if($clustLeft > 0 && !ctype_space(mb_substr($text, $clustLeft, 1))
&& !ctype_space(mb_substr($text, $clustLeft - 1, 1)))
{
$clustLeft++;
while(!ctype_space(mb_substr($text, $clustLeft, 1)))
{
$clustLeft++;
}
$prefix = $delimiter;
}
$lastChar = $clustLeft + $clustLen;
if($lastChar < $textLen && !ctype_space(mb_substr($text, $lastChar, 1))
&& !ctype_space(mb_substr($text, $lastChar + 1, 1)))
{
$lastChar--;
while(!ctype_space(mb_substr($text, $lastChar, 1)))
{
$lastChar--;
}
$suffix = $delimiter;
$clustLen = $lastChar - $clustLeft;
}
if($clustLeft > 0)
{
$prefix = $delimiter;
}
if($clustLeft + $clustLen < $textLen)
{
$suffix = $delimiter;
}
return $prefix . trim(mb_substr($text, $clustLeft, $clustLen + 1)) . $suffix;
}