parsing user-typed Full Text Search queries into WHERE clause of MySQL using PHP

后端 未结 2 1729
孤街浪徒
孤街浪徒 2020-12-16 07:43

I want to convert user typed FTS queries in to MySQL\'s WHERE clause. So the functionality will be something like Gmail\'s search. So users will be able to type:

<         


        
2条回答
  •  暖寄归人
    2020-12-16 08:35

    The following code consists of the classes Tokenizer, Token and QueryBuilder. It is probably not the most elegant solution ever, but it actually does what you were asking:

     'id',
        'from' => 'from',
        'to' => 'to',
        'title' => 'title',
        'description' => 'description',
        'time_created' => 'time_created'
    );
    $fullTextFields = array('title','description');
    
    $qb = new QueryBuilder($fieldMapping, $fullTextFields);
    try {
        echo $qb->parseSearchRule($input);
    } catch(Exception $error) {
        echo 'Error occurred while parsing search query: 
    '.$error->getMessage(); } class Token { const KEYWORD = 'KEYWORD', OPEN_PAR='OPEN_PAR', CLOSE_PAR='CLOSE_PAR', FIELD='FIELD', AND_OP='AND_OP', OR_OP='OR_OP'; public $type; public $chars; public $position; function __construct($type,$chars,$position) { $this->type = $type; $this->chars = $chars; $this->position = $position; } function __toString() { return 'Token[ type='.$this->type.', chars='.$this->chars.', position='.$this->position.' ]'; } } class Tokenizer { private $tokens = array(); private $input; private $currentPosition; function __construct($input) { $this->input = trim($input); $this->currentPosition = 0; } /** * @return Token */ function getToken() { if(count($this->tokens)==0) { $token = $this->nextToken(); if($token==null) { return null; } array_push($this->tokens, $token); } return $this->tokens[0]; } function consumeToken() { $token = $this->getToken(); if($token==null) { return null; } array_shift($this->tokens); return $token; } protected function nextToken() { $reservedCharacters = '\:\s\(\)'; $fieldExpr = '/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/'; $keyWord = '/^([^'.$reservedCharacters.']+)/'; $andOperator = '/^AND\s/'; $orOperator = '/^OR\s/'; // Remove whitespaces .. $whiteSpaces = '/^\s+/'; $remaining = substr($this->input,$this->currentPosition); if(preg_match($whiteSpaces, $remaining, $matches)) { $this->currentPosition += strlen($matches[0]); $remaining = substr($this->input,$this->currentPosition); } if($remaining=='') { return null; } switch(substr($remaining,0,1)) { case '(': return new Token(Token::OPEN_PAR,'(',$this->currentPosition++); case ')': return new Token(Token::CLOSE_PAR,')',$this->currentPosition++); } if(preg_match($fieldExpr, $remaining, $matches)) { $token = new Token(Token::FIELD, $matches[0], $this->currentPosition); $this->currentPosition += strlen($matches[0]); } else if(preg_match($andOperator, $remaining, $matches)) { $token = new Token(Token::AND_OP, 'AND', $this->currentPosition); $this->currentPosition += 3; } else if(preg_match($orOperator, $remaining, $matches)) { $token = new Token(Token::OR_OP, 'OR', $this->currentPosition); $this->currentPosition += 2; } else if(preg_match($keyWord, $remaining, $matches)) { $token = new Token(Token::KEYWORD, $matches[0], $this->currentPosition); $this->currentPosition += strlen($matches[0]); } else throw new Exception('Unable to tokenize: '.$remaining); return $token; } } class QueryBuilder { private $fieldMapping; private $fulltextFields; function __construct($fieldMapping, $fulltextFields) { $this->fieldMapping = $fieldMapping; $this->fulltextFields = $fulltextFields; } function parseSearchRule($input) { $t = new Tokenizer($input); $token = $t->getToken(); if($token==null) { return ''; } $token = $t->getToken(); if($token->type!=Token::KEYWORD) { $searchRule = $this->parseSimpleSearchRule($t); } else { $searchRule = ''; } $keywords = ''; while($token = $t->consumeToken()) { if($token->type!=Token::KEYWORD) { throw new Exception('Only keywords allowed at end of search rule.'); } if($keywords!='') { $keywords .= ' '; } $keywords .= $token->chars; } if($keywords!='') { $matchClause = 'MATCH (`'.(implode('`,`',$this->fulltextFields)).'`) AGAINST ('; $keywords = $matchClause.'\''.mysql_real_escape_string($keywords).'\' IN BOOLEAN MODE)'; if($searchRule=='') { $searchRule = $keywords; } else { $searchRule = '('.$searchRule.') AND ('.$keywords.')'; } } return $searchRule; } protected function parseSimpleSearchRule(Tokenizer $t) { $expressions = array(); do { $repeat = false; $expressions[] = $this->parseExpression($t); $token = $t->getToken(); if($token->type==Token::OR_OP) { $t->consumeToken(); $repeat = true; } } while($repeat); return implode(' OR ', $expressions); } protected function parseExpression(Tokenizer $t) { $expressions = array(); do { $repeat = false; $expressions[] = $this->parseSimpleExpression($t); $token = $t->getToken(); if($token->type==Token::AND_OP) { $t->consumeToken(); $repeat = true; } } while($repeat); return implode(' AND ', $expressions); } protected function parseSimpleExpression(Tokenizer $t) { $token = $t->consumeToken(); if($token->type==Token::OPEN_PAR) { $spr = $this->parseSimpleSearchRule($t); $token = $t->consumeToken(); if($token==null || $token->type!=Token::CLOSE_PAR) { throw new Exception('Expected closing parenthesis, found: '.$token->chars); } return '('.$spr.')'; } else if($token->type==Token::FIELD) { $fieldVal = explode(':', $token->chars,2); if(isset($this->fieldMapping[$fieldVal[0]])) { return '`'.$this->fieldMapping[$fieldVal[0]].'` = \''.mysql_real_escape_string($fieldVal[1]).'\''; } throw new Exception('Unknown field selected: '.$token->chars); } else { throw new Exception('Expected opening parenthesis or field-expression, found: '.$token->chars); } } } ?>

    A more proper solution would first build a parse tree, and then transform it into a query, after some further analysis.

提交回复
热议问题