I have been trying to port invRegex.py to a node.js implementation for a while, but I\'m still struggling with it. I already have the regular expression parse tree thanks to
Just want to share what I came up with, using generators and based off invRegex.py
:
var ret = require('ret');
var tokens = ret('([ab]) ([cd]) \\1 \\2 z');
var references = [];
capture(tokens);
// console.log(references);
for (string of generate(tokens)) {
console.log(string);
}
function capture(token) {
if (Array.isArray(token)) {
for (var i = 0; i < token.length; ++i) {
capture(token[i]);
}
}
else {
if ((token.type === ret.types.ROOT) || (token.type === ret.types.GROUP)) {
if ((token.type === ret.types.GROUP) && (token.remember === true)) {
var group = [];
if (token.hasOwnProperty('stack') === true) {
references.push(function* () {
yield* generate(token.stack);
});
}
else if (token.hasOwnProperty('options') === true) {
for (var generated of generate(token)) {
group.push(generated);
}
references.push(group);
}
}
if (token.hasOwnProperty('stack') === true) {
capture(token.stack);
}
else if (token.hasOwnProperty('options') === true) {
for (var i = 0; i < token.options.length; ++i) {
capture(token.options[i]);
}
}
return true;
}
else if (token.type === ret.types.REPETITION) {
capture(token.value);
}
}
}
function* generate(token) {
if (Array.isArray(token)) {
if (token.length > 1) {
for (var prefix of generate(token[0])) {
for (var suffix of generate(token.slice(1))) {
yield prefix + suffix;
}
}
}
else {
yield* generate(token[0]);
}
}
else {
if ((token.type === ret.types.ROOT) || (token.type === ret.types.GROUP)) {
if (token.hasOwnProperty('stack') === true) {
token.options = [token.stack];
}
for (var i = 0; i < token.options.length; ++i) {
yield* generate(token.options[i]);
}
}
else if (token.type === ret.types.POSITION) {
yield '';
}
else if (token.type === ret.types.SET) {
for (var i = 0; i < token.set.length; ++i) {
var node = token.set[i];
if (token.not === true) {
if ((node.type === ret.types.CHAR) && (node.value === 10)) {
}
}
yield* generate(node);
}
}
else if (token.type === ret.types.RANGE) {
for (var i = token.from; i <= token.to; ++i) {
yield String.fromCharCode(i);
}
}
else if (token.type === ret.types.REPETITION) {
if (token.min === 0) {
yield '';
}
for (var i = token.min; i <= token.max; ++i) {
var stack = [];
for (var j = 0; j < i; ++j) {
stack.push(token.value);
}
if (stack.length > 0) {
yield* generate(stack);
}
}
}
else if (token.type === ret.types.REFERENCE) {
console.log(references);
if (references.hasOwnProperty(token.value - 1)) {
yield* references[token.value - 1]();
// yield references[token.value - 1]().next().value;
}
else {
yield '';
}
}
else if (token.type === ret.types.CHAR) {
yield String.fromCharCode(token.value);
}
}
}
I still haven't figured out how to implement capturing groups / references and the values yielded in the REPETITION
token type are not generated in lexicographic order yet, but other than that it works.