I have strings, and i want to find in them 2 words: \'start\' and \'end\'.
\'start\' and \'end\'
I'm having a hard time understanding what you exactly want, but if I understand correctly: you cannot do this with pure regex in javascript because lookbehind (positive (?<=...)
and negative (?<!...)
) is not supported, and thus you would not be able to match the 'start(n)' before the match result.
but instead you can use subgroups (subgroups aren't fully supported in javascript so you'll need to use replace):
var string = "something start(1) something_needed end(1) something";
var regex = /start\((\d+)\)(.*)end\(\1\)/;
string.replace(regex, function($0, $1, $2) {
var result = $2;
console.log($2)
//do stuff with $2 here
});
$0 is the original match (start\((\d+)\)(.*)end\(\1\)
)
$1 and $2 are the groups that are outputted by the regex.
$1 refers to (\d+)
. It's already used to 'store' the number behind start (1
in this case). But here's where the magic happens: it gets loaded again and matched against with \1
inside the regex.
$2 is where the info you need is stored. it refers to (.*)
Here is a possible solution from Matching Nested Constructs in JavaScript, Part 2.
Example usage:
matchRecursiveRegExp("START text START text END text more END text", "START", "END");
// (c) 2007 Steven Levithan <stevenlevithan.com>
// MIT License
/*** matchRecursiveRegExp
Accepts a string to search, a left and right format delimiter
as regex patterns, and optional regex flags. Returns an array
of matches, allowing nested instances of left/right delimiters.
Use the "g" flag to return all matches, otherwise only the
first is returned. Be careful to ensure that the left and
right format delimiters produce mutually exclusive matches.
Backreferences are not supported within the right delimiter
due to how it is internally combined with the left delimiter.
When matching strings whose format delimiters are unbalanced
to the left or right, the output is intentionally as a
conventional regex library with recursion support would
produce, e.g. "<<x>" and "<x>>" both produce ["x"] when using
"<" and ">" as the delimiters (both strings contain a single,
balanced instance of "<x>").
examples:
matchRecursiveRegExp("test", "\\(", "\\)")
returns: []
matchRecursiveRegExp("<t<<e>><s>>t<>", "<", ">", "g")
returns: ["t<<e>><s>", ""]
matchRecursiveRegExp("<div id=\"x\">test</div>", "<div\\b[^>]*>", "</div>", "gi")
returns: ["test"]
*/
function matchRecursiveRegExp (str, left, right, flags) {
var f = flags || "",
g = f.indexOf("g") > -1,
x = new RegExp(left + "|" + right, "g" + f),
l = new RegExp(left, f.replace(/g/g, "")),
a = [],
t, s, m;
do {
t = 0;
while (m = x.exec(str)) {
if (l.test(m[0])) {
if (!t++) s = x.lastIndex;
} else if (t) {
if (!--t) {
a.push(str.slice(s, m.index));
if (!g) return a;
}
}
}
} while (t && (x.lastIndex = s));
return a;
}
document.write(matchRecursiveRegExp("something start something_needed end something", "start", "end") + "<br/>");
document.write(matchRecursiveRegExp("start something start something end something end start something end", "start", "end")+ "<br/>");
document.write(matchRecursiveRegExp("start something start start something end something start end something end something end something start something end", "start", "end")+ "<br/>");
what you are looking for is to find 'start' count the amount of times another 'start' is found, and then ignore an equal amount of 'end's. This is a thing that cannot be done with regex.
its impossible to compare the amount of times 2 strings match with pure regex.
instead, here's several semi-regex solution for this problem:
var string = "start(1) something start(2) start(3) something end(3) something start(4) end(4) something end(2) something end(1) something start(5) something end(5)";
var stop;
do {
stop = true;
string = string.replace(/start((?:[^s]|s(?!tart))*?)end/, function($0, $1) {
stop = false;
var result = $1;
//do stuff with result here..
console.log(result);
return ""; //replaces the match with empty so it can continue processing
});
} while (!stop);
whats good about this method is that is simple, and you can have an infinite number of nested statements.