Porting invRegex.py to Javascript (Node.js)

后端 未结 6 1895
余生分开走
余生分开走 2020-12-16 03:16

I have been trying to port invRegex.py to a node.js implementation for a while, but I\'m still struggling with it. I already have the regular expression parse tree thanks to

6条回答
  •  独厮守ぢ
    2020-12-16 03:56

    Just want to share what I came up with, using generators and based off invRegex.py:

    var ret = require('ret');
    
    var tokens = ret('([ab]) ([cd]) \\1 \\2 z');
    var references = [];
    
    capture(tokens);
    // console.log(references);
    
    for (string of generate(tokens)) {
        console.log(string);
    }
    
    function capture(token) {
        if (Array.isArray(token)) {
            for (var i = 0; i < token.length; ++i) {
                capture(token[i]);
            }
        }
    
        else {
            if ((token.type === ret.types.ROOT) || (token.type === ret.types.GROUP)) {
                if ((token.type === ret.types.GROUP) && (token.remember === true)) {
                    var group = [];
    
                    if (token.hasOwnProperty('stack') === true) {
                        references.push(function* () {
                            yield* generate(token.stack);
                        });
                    }
    
                    else if (token.hasOwnProperty('options') === true) {
                        for (var generated of generate(token)) {
                            group.push(generated);
                        }
    
                        references.push(group);
                    }
                }
    
                if (token.hasOwnProperty('stack') === true) {
                    capture(token.stack);
                }
    
                else if (token.hasOwnProperty('options') === true) {
                    for (var i = 0; i < token.options.length; ++i) {
                        capture(token.options[i]);
                    }
                }
    
                return true;
            }
    
            else if (token.type === ret.types.REPETITION) {
                capture(token.value);
            }
        }
    }
    
    function* generate(token) {
        if (Array.isArray(token)) {
            if (token.length > 1) {
                for (var prefix of generate(token[0])) {
                    for (var suffix of generate(token.slice(1))) {
                        yield prefix + suffix;
                    }
                }
            }
    
            else {
                yield* generate(token[0]);
            }
        }
    
        else {
            if ((token.type === ret.types.ROOT) || (token.type === ret.types.GROUP)) {
                if (token.hasOwnProperty('stack') === true) {
                    token.options = [token.stack];
                }
    
                for (var i = 0; i < token.options.length; ++i) {
                    yield* generate(token.options[i]);
                }
            }
    
            else if (token.type === ret.types.POSITION) {
                yield '';
            }
    
            else if (token.type === ret.types.SET) {
                for (var i = 0; i < token.set.length; ++i) {
                    var node = token.set[i];
    
                    if (token.not === true) {
                        if ((node.type === ret.types.CHAR) && (node.value === 10)) {
                        }
                    }
    
                    yield* generate(node);
                }
            }
    
            else if (token.type === ret.types.RANGE) {
                for (var i = token.from; i <= token.to; ++i) {
                    yield String.fromCharCode(i);
                }
            }
    
            else if (token.type === ret.types.REPETITION) {
                if (token.min === 0) {
                    yield '';
                }
    
                for (var i = token.min; i <= token.max; ++i) {
                    var stack = [];
    
                    for (var j = 0; j < i; ++j) {
                        stack.push(token.value);
                    }
    
                    if (stack.length > 0) {
                        yield* generate(stack);
                    }
                }
            }
    
            else if (token.type === ret.types.REFERENCE) {
                console.log(references);
                if (references.hasOwnProperty(token.value - 1)) {
                    yield* references[token.value - 1]();
                    // yield references[token.value - 1]().next().value;
                }
    
                else {
                    yield '';
                }
            }
    
            else if (token.type === ret.types.CHAR) {
                yield String.fromCharCode(token.value);
            }
        }
    }
    

    I still haven't figured out how to implement capturing groups / references and the values yielded in the REPETITION token type are not generated in lexicographic order yet, but other than that it works.

提交回复
热议问题