How to generate excerpt with most searched words in PHP?

前端 未结 4 943
栀梦
栀梦 2020-12-20 03:07

Here is an excerpt function:

    function excerpt($text, $phrase, $radius = 100, $ending = \"...\") {
270             if (empty($text) or empty($phrase)) {
2         


        
4条回答
  •  没有蜡笔的小新
    2020-12-20 03:16

    I could not contact erisco, so I am posting his function with multiple fixes (most importantly multibyte support).

    /**
     * @param string $text text to be searched
     * @param string $phrase search string
     * @param int $span approximate length of the excerpt
     * @param string $delimiter string to use as a suffix and/or prefix if the excerpt is from the middle of a text
     *
     * @return string
     */
    public static function excerpt($text, $phrase, $span = 100, $delimiter = '...')
    {
    	$phrases = preg_split('/\s+/u', $phrase);
    	$regexp = '/\b(?:';
    	foreach($phrases as $phrase)
    	{
    		$regexp.= preg_quote($phrase, '/') . '|';
    	}
    
    	$regexp = mb_substr($regexp, 0, -1) .')\b/ui';
    	$matches = [];
    	preg_match_all($regexp, $text, $matches, PREG_OFFSET_CAPTURE);
    	$matches = $matches[0];
    	$nodes = [];
    	foreach($matches as $match)
    	{
    		$node = new stdClass;
    		$node->phraseLength = mb_strlen($match[0]);
    		$node->position = mb_strlen(substr($text, 0, $match[1])); // calculate UTF-8 position (@see https://bugs.php.net/bug.php?id=67487)
    		$nodes[] = $node;
    	}
    
    	if(count($nodes) > 0)
    	{
    		$clust = new stdClass;
    		$clust->nodes[] = array_shift($nodes);
    		$clust->length = $clust->nodes[0]->phraseLength;
    		$clust->i = 0;
    		$clusters = new stdClass;
    		$clusters->data =
    		[
    			$clust
    		];
    		$clusters->i = 0;
    		foreach($nodes as $node)
    		{
    			$lastClust = $clusters->data[$clusters->i];
    			$lastNode = $lastClust->nodes[$lastClust->i];
    			$addedLength = $node->position - $lastNode->position - $lastNode->phraseLength + $node->phraseLength;
    			if($lastClust->length + $addedLength <= $span)
    			{
    				$lastClust->nodes[] = $node;
    				$lastClust->length+= $addedLength;
    				$lastClust->i++;
    			}
    			else
    			{
    				if($addedLength > $span)
    				{
    					$newClust = new stdClass;
    					$newClust->nodes =
    					[
    						$node
    					];
    					$newClust->i = 0;
    					$newClust->length = $node->phraseLength;
    					$clusters->data[] = $newClust;
    					$clusters->i++;
    				}
    				else
    				{
    					$newClust = clone $lastClust;
    					while($newClust->length + $addedLength > $span)
    					{
    						$shiftedNode = array_shift($newClust->nodes);
    						if($shiftedNode === null)
    						{
    							break;
    						}
    
    						$newClust->i--;
    						$removedLength = $shiftedNode->phraseLength;
    						if(isset($newClust->nodes[0]))
    						{
    							$removedLength+= $newClust->nodes[0]->position - $shiftedNode->position;
    						}
    
    						$newClust->length-= $removedLength;
    					}
    
    					if($newClust->i < 0)
    					{
    						$newClust->i = 0;
    					}
    
    					$newClust->nodes[] = $node;
    					$newClust->length+= $addedLength;
    					$clusters->data[] = $newClust;
    					$clusters->i++;
    				}
    			}
    		}
    
    		$bestClust = $clusters->data[0];
    		$bestClustSize = count($bestClust->nodes);
    		foreach($clusters->data as $clust)
    		{
    			$newClustSize = count($clust->nodes);
    			if($newClustSize > $bestClustSize)
    			{
    				$bestClust = $clust;
    				$bestClustSize = $newClustSize;
    			}
    		}
    
    		$clustLeft = $bestClust->nodes[0]->position;
    		$clustLen = $bestClust->length;
    		$padding = intval(round(($span - $clustLen) / 2));
    		$clustLeft-= $padding;
    		if($clustLeft < 0)
    		{
    			$clustLen+= $clustLeft * -1 + $padding;
    			$clustLeft = 0;
    		}
    		else
    		{
    			$clustLen+= $padding * 2;
    		}
    	}
    	else
    	{
    		$clustLeft = 0;
    		$clustLen = $span;
    	}
    
    	$textLen = mb_strlen($text);
    	$prefix = '';
    	$suffix = '';
    	if($clustLeft > 0 && !ctype_space(mb_substr($text, $clustLeft, 1))
    		&& !ctype_space(mb_substr($text, $clustLeft - 1, 1)))
    	{
    		$clustLeft++;
    		while(!ctype_space(mb_substr($text, $clustLeft, 1)))
    		{
    			$clustLeft++;
    		}
    
    		$prefix = $delimiter;
    	}
    
    	$lastChar = $clustLeft + $clustLen;
    	if($lastChar < $textLen && !ctype_space(mb_substr($text, $lastChar, 1))
    		&& !ctype_space(mb_substr($text, $lastChar + 1, 1)))
    	{
    		$lastChar--;
    		while(!ctype_space(mb_substr($text, $lastChar, 1)))
    		{
    			$lastChar--;
    		}
    
    		$suffix = $delimiter;
    		$clustLen = $lastChar - $clustLeft;
    	}
    
    	if($clustLeft > 0)
    	{
    		$prefix = $delimiter;
    	}
    	if($clustLeft + $clustLen < $textLen)
    	{
    		$suffix = $delimiter;
    	}
    
    	return $prefix . trim(mb_substr($text, $clustLeft, $clustLen + 1)) . $suffix;
    }

提交回复
热议问题