I have been working on a function to parse a formula for some time, but haven\'t been able to make it work properly. It seems to not always work - it filters some parts of t
It's probably easier to tokenize the expression you want to parse. When tokenized it's way easier to read that stream of tokens and build your own expressions.
I've put up a demo on jsFiddle which can parse your given formula
In the demo I used this Tokenizer
class and tokens to build a TokenStream
from the formula.
function Tokenizer() {
this.tokens = {};
// The regular expression which matches a token per group.
this.regex = null;
// Holds the names of the tokens. Index matches group. See buildExpression()
this.tokenNames = [];
}
Tokenizer.prototype = {
addToken: function(name, expression) {
this.tokens[name] = expression;
},
tokenize: function(data) {
this.buildExpression(data);
var tokens = this.findTokens(data);
return new TokenStream(tokens);
},
buildExpression: function (data) {
var tokenRegex = [];
for (var tokenName in this.tokens) {
this.tokenNames.push(tokenName);
tokenRegex.push('('+this.tokens[tokenName]+')');
}
this.regex = new RegExp(tokenRegex.join('|'), 'g');
},
findTokens: function(data) {
var tokens = [];
var match;
while ((match = this.regex.exec(data)) !== null) {
if (match == undefined) {
continue;
}
for (var group = 1; group < match.length; group++) {
if (!match[group]) continue;
tokens.push({
name: this.tokenNames[group - 1],
data: match[group]
});
}
}
return tokens;
}
}
TokenStream = function (tokens) {
this.cursor = 0;
this.tokens = tokens;
}
TokenStream.prototype = {
next: function () {
return this.tokens[this.cursor++];
},
peek: function (direction) {
if (direction === undefined) {
direction = 0;
}
return this.tokens[this.cursor + direction];
}
}
Defined tokens
tokenizer.addToken('whitespace', '\\s+');
tokenizer.addToken('l_paren', '\\(');
tokenizer.addToken('r_paren', '\\)');
tokenizer.addToken('float', '[0-9]+\\.[0-9]+');
tokenizer.addToken('int', '[0-9]+');
tokenizer.addToken('div', '\\/');
tokenizer.addToken('mul', '\\*');
tokenizer.addToken('add', '\\+');
tokenizer.addToken('constant', 'pi|PI');
tokenizer.addToken('id', '[a-zA-Z_][a-zA-Z0-9_]*');
With the above tokens defined the tokenizer can recognize everything in your formula. When the formula
2/0.6 pcs of foo * pi bar + sqrt(4) foobar
is tokenized the result would be a token stream similar to
int(2), div(/), float(0.6), whitespace( ), id(pcs), whitespace( ), id(of), whitespace( ), id(foo), whitespace( ), mul(*), whitespace( ), constant(pi), whitespace( ), id(bar), whitespace( ), add(+), whitespace( ), id(sqrt), l_paren((), int(4), r_paren()), whitespace( ), id(foobar)