Get index of each capture in a JavaScript regex

后端 未结 7 862
一个人的身影
一个人的身影 2020-11-29 04:43

I want to match a regex like /(a).(b)(c.)d/ with \"aabccde\", and get the following information back:

\"a\" at index = 0
\"b\" at i         


        
7条回答
  •  予麋鹿
    予麋鹿 (楼主)
    2020-11-29 05:04

    So, you have a text and a regular expression:

    txt = "aabccde";
    re = /(a).(b)(c.)d/;
    

    The first step is to get the list of all substrings that match the regular expression:

    subs = re.exec(txt);
    

    Then, you can do a simple search on the text for each substring. You will have to keep in a variable the position of the last substring. I've named this variable cursor.

    var cursor = subs.index;
    for (var i = 1; i < subs.length; i++){
        sub = subs[i];
        index = txt.indexOf(sub, cursor);
        cursor = index + sub.length;
    
    
        console.log(sub + ' at index ' + index);
    }
    

    EDIT: Thanks to @nhahtdh, I've improved the mechanism and made a complete function:

    String.prototype.matchIndex = function(re){
        var res  = [];
        var subs = this.match(re);
    
        for (var cursor = subs.index, l = subs.length, i = 1; i < l; i++){
            var index = cursor;
    
            if (i+1 !== l && subs[i] !== subs[i+1]) {
                nextIndex = this.indexOf(subs[i+1], cursor);
                while (true) {
                    currentIndex = this.indexOf(subs[i], index);
                    if (currentIndex !== -1 && currentIndex <= nextIndex)
                        index = currentIndex + 1;
                    else
                        break;
                }
                index--;
            } else {
                index = this.indexOf(subs[i], cursor);
            }
            cursor = index + subs[i].length;
    
            res.push([subs[i], index]);
        }
        return res;
    }
    
    
    console.log("aabccde".matchIndex(/(a).(b)(c.)d/));
    // [ [ 'a', 1 ], [ 'b', 2 ], [ 'cc', 3 ] ]
    
    console.log("aaa".matchIndex(/(a).(.)/));
    // [ [ 'a', 0 ], [ 'a', 1 ] ] <-- problem here
    
    console.log("bababaaaaa".matchIndex(/(ba)+.(a*)/));
    // [ [ 'ba', 4 ], [ 'aaa', 6 ] ]
    

提交回复
热议问题