Wildcard string matching

后端 未结 9 1546
醉话见心
醉话见心 2021-01-02 01:54

What is the most efficient wildcard string matching algorithm? I am asking only about an idea, it is not necessary to provide actual code.

I\'m thinking that such al

9条回答
  •  心在旅途
    2021-01-02 02:28

    If your pattern can contain only * wild cards, then the trivial implementation is as fast as possible. The main thing to realize in this case, is that you only need to look for each card once (card = segment between stars).

    Here is an implementation (supporting only * wild cards):

    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    
    using namespace std;
    
    class wildcard_pattern {
    public:
        explicit wildcard_pattern(const string& text);
    
        bool match(const char* begin, const char* end) const;
    
        bool match(const char* c_str) const;
    
    private:
        string m_text;
    
        struct card {
            size_t m_offset, m_size;
            card(size_t begin, size_t end);
        };
    
        // Must contain at least one card. The first, and the last card
        // may be empty strings. All other cards must be non-empty. If
        // there is exactly one card, the pattern matches a string if, and
        // only if the string is equal to the card. Otherwise, the first
        // card must be a prefix of the string, and the last card must be
        // a suffix.
        vector m_cards;
    };
    
    
    wildcard_pattern::wildcard_pattern(const string& text):
        m_text(text)
    {
        size_t pos = m_text.find('*');
        if (pos == string::npos) {
            m_cards.push_back(card(0, m_text.size()));
            return;
        }
        m_cards.push_back(card(0, pos));
        ++pos;
        for (;;) {
            size_t pos_2 = m_text.find('*', pos);
            if (pos_2 == string::npos)
                break;
            if (pos_2 != pos)
                m_cards.push_back(card(pos, pos_2));
            pos = pos_2 + 1;
        }
        m_cards.push_back(card(pos, m_text.size()));
    }
    
    bool wildcard_pattern::match(const char* begin, const char* end) const
    {
        const char* begin_2 = begin;
        const char* end_2   = end;
    
        size_t num_cards = m_cards.size();
        typedef string::const_iterator str_iter;
    
        // Check anchored prefix card
        {
            const card& card = m_cards.front();
            if (size_t(end_2 - begin_2) < card.m_size)
                return false;
            str_iter card_begin = m_text.begin() + card.m_offset;
            if (!equal(begin_2, begin_2 + card.m_size, card_begin))
                return false;
            begin_2 += card.m_size;
        }
    
        if (num_cards == 1)
            return begin_2 == end_2;
    
        // Check anchored suffix card
        {
            const card& card = m_cards.back();
            if (size_t(end_2 - begin_2) < card.m_size)
                return false;
            str_iter card_begin = m_text.begin() + card.m_offset;
            if (!equal(end_2 - card.m_size, end_2, card_begin))
                return false;
            end_2 -= card.m_size;
        }
    
        // Check unanchored infix cards
        for (size_t i = 1; i != num_cards-1; ++i) {
            const card& card = m_cards[i];
            str_iter card_begin = m_text.begin() + card.m_offset;
            str_iter card_end   = card_begin + card.m_size;
            begin_2 = search(begin_2, end_2, card_begin, card_end);
            if (begin_2 == end_2)
                return false;
            begin_2 += card.m_size;
        }
    
        return true;
    }
    
    inline bool wildcard_pattern::match(const char* c_str) const
    {
        const char* begin = c_str;
        const char* end   = begin + strlen(c_str);
        return match(begin, end);
    }
    
    inline wildcard_pattern::card::card(size_t begin, size_t end)
    {
        m_offset = begin;
        m_size   = end - begin;
    }
    
    
    int main(int, const char* argv[])
    {
        wildcard_pattern pat(argv[1]);
        cout << pat.match(argv[2]) << endl;
    }
    

提交回复
热议问题