Emulating SQL LIKE in JavaScript

后端 未结 9 1924
有刺的猬
有刺的猬 2020-11-30 05:56

How can I emulate the SQL keyword LIKE in JavaScript?

For those of you who don\'t know what LIKE is, it\'s a very simple regex which only s

9条回答
  •  野趣味
    野趣味 (楼主)
    2020-11-30 06:40

    I needed this, with escaping and working in Safari (no negative lookbehinds). Here is what I came up with:

    /**
     * Quotes a string following the same rules as https://www.php.net/manual/function.preg-quote.php
     *
     * Sourced from https://locutus.io/php/preg_quote/
     *
     * @param {string} str String to quote.
     * @param {?string} [delimiter] Delimiter to also quote.
     * @returns {string} The quoted string.
     */
    function regexQuote(str, delimiter) {
        return (str + '').replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&');
    }
    
    /**
     * Removes the diacritical marks from a string.
     *
     * Diacritical marks: {@link https://unicode-table.com/blocks/combining-diacritical-marks/}
     *
     * @param {string} str The string from which to strip the diacritical marks.
     * @returns {string} Stripped string.
     */
    function stripDiacriticalMarks(str) {
        return unorm.nfkd(str).replaceAll(/[\u0300-\u036f]+/g, '');
    }
    
    /**
     * Checks if the string `haystack` is like `needle`, `needle` can contain '%' and '_'
     * characters which will behave as if used in a SQL LIKE condition. Character escaping
     * is supported with '\'.
     *
     * @param {string} haystack The string to check if it is like `needle`.
     * @param {string} needle The string used to check if `haystack` is like it.
     * @param {boolean} [ai] Whether to check likeness in an accent-insensitive manner.
     * @param {boolean} [ci] Whether to check likeness in a case-insensitive manner.
     * @returns {boolean} True if `haystack` is like `needle`, otherwise, false.
     */
    function strLike(haystack, needle, ai = true, ci = true) {
        if (ai) {
            haystack = stripDiacriticalMarks(haystack);
            needle = stripDiacriticalMarks(needle);
        }
    
        needle = regexQuote(needle, '/');
    
        let tokens = [];
    
        for (let i = 0; i < needle.length; ) {
            if (needle[i] === '\\') {
                i += 2;
                if (i < needle.length) {
                    if (needle[i] === '\\') {
                        tokens.push('\\\\');
                        i += 2;
                    } else {
                        tokens.push(needle[i]);
                        ++i;
                    }
                } else {
                    tokens.push('\\\\');
                }
            } else {
                switch (needle[i]) {
                    case '_':
                        tokens.push('.')
                        break;
                    case '%':
                        tokens.push('.*')
                        break;
                    default:
                        tokens.push(needle[i]);
                        break;
                }
                ++i;
            }
        }
    
        return new RegExp(`^${tokens.join('')}$`, `u${ci ? 'i' : ''}`).test(haystack);
    }
    
    /**
     * Escapes a string in a way that `strLike` will match it as-is, thus '%' and '_'
     * would match a literal '%' and '_' respectively (and not behave as in a SQL LIKE
     * condition).
     *
     * @param {string} str The string to escape.
     * @returns {string} The escaped string.
     */
    function escapeStrLike(str) {
        let tokens = [];
    
        for (let i = 0; i < str.length; i++) {
            switch (str[i]) {
                case '\\':
                    tokens.push('\\\\');
                    break;
                case '%':
                    tokens.push('\\%')
                    break;
                case '_':
                    tokens.push('\\_')
                    break;
                default:
                    tokens.push(str[i]);
            }
        }
    
        return tokens.join('');
    }
    

    The code above is dependant on unorm, and is unicode aware to be able to catch cases like:

    strLike('Hello 

提交回复
热议问题