Generate syntax tree for simple math operations

后端 未结 4 1450
故里飘歌
故里飘歌 2020-12-31 19:27

I am trying to generate a syntax tree, for a given string with simple math operators (+, -, *, /, and parenthesis). Given the string \"1 + 2 * 3\":

It shou

相关标签:
4条回答
  • 2020-12-31 19:55

    The way to do a top down parser, if not using FLEX/BISON or any other similar package is to first write a tokenizer that can parse input and serve tokens.

    Basically you need a tokenizer that provides getNextToken, peekNextToken and skipNextToken.

    Then you work your way down using this structure.

    // parser.js
    var input, currToken, pos;
    
    var TOK_OPERATOR = 1;
    var TOK_NUMBER = 2;
    var TOK_EOF = 3;
    
    function nextToken() {
      var c, tok = {};
    
      while(pos < input.length) {
        c = input.charAt(pos++);
        switch(c) {
          case '+':
          case '-':
          case '*':
          case '/':
          case '(':
          case ')':
        tok.op = c;
        tok.type = TOK_OPERATOR;
        return tok;
    
          case '0':
          case '1':
          case '2':
          case '3':
          case '4':
          case '5':
          case '6':
          case '7':
          case '8':
          case '9':
        tok.value = c;
        tok.type = TOK_NUMBER;
        return tok;
    
          default:
        throw "Unexpected character: " + c;
        }
      }
      tok.type = TOK_EOF;
      return tok;
    }
    
    function getNextToken() {
      var ret;
    
      if(currToken)
        ret = currToken;
      else
        ret = nextToken();
    
      currToken = undefined;
    
      return ret;
    }
    
    function peekNextToken() {
      if(!currToken)
        currToken = nextToken();
    
      return currToken;
    }
    
    function skipNextToken() {
      if(!currToken)
        currToken = nextToken();
      currToken = undefined;
    }
    
    function parseString(str) {
      input = str;
      pos = 0;
    
      return expression();
    }
    
    
    function expression() {
      return additiveExpression();
    }
    
    function additiveExpression() {
      var left = multiplicativeExpression();
        var tok = peekNextToken();
        while(tok.type == TOK_OPERATOR && (tok.op == '+' || tok.op == '-') ) {
            skipNextToken();
            var node = {};
            node.op = tok.op;
            node.left = left;
            node.right = multiplicativeExpression();
            left = node;
        tok = peekNextToken();
        }
        return left;
    }
    
    function multiplicativeExpression() {
      var left = primaryExpression();
        var tok = peekNextToken();
        while(tok.type == TOK_OPERATOR &&  (tok.op == '*' || tok.op == '/') ) {
            skipNextToken();
            var node = {};
            node.op = tok.op;
            node.left = left;
            node.right = primaryExpression();
            left = node;
        tok = peekNextToken();
        }
        return left;
    }
    
    function primaryExpression() {
      var tok = peekNextToken();
      if(tok.type == TOK_NUMBER) {
        skipNextToken();
        node = {};
        node.value = tok.value;
        return node;
      }
      else
      if(tok.type == TOK_OPERATOR && tok.op == '(') {
        skipNextToken();
        var node = expression(); // The beauty of recursion
        tok = getNextToken();
        if(tok.type != TOK_OPERATOR || tok.op != ')')
          throw "Error ) expected";
        return node    
      }
      else
        throw "Error " + tok + " not exptected";
    }
    

    As you can see, you start by requesting the least privileged operation, which requires the next higher privileged operation as its left and right term and so on. Unary operators has a little different structure. The neat thing is the recursion at the end when a parenthesis is encountered.

    Here is a demo page that uses the parser and renders the parse-tree (had the code for it laying around...)

    <html>
    <head>
    <title>tree</title>
    <script src="parser.js"></script>
    </head>
    
    <body onload="testParser()">
    
    <script>
    
    function createTreeNode(x, y, val, color) {
      var node = document.createElement("div");
      node.style.position = "absolute";
      node.style.left = "" + x;
      node.style.top = "" + y;
    
      node.style.border= "solid";
      node.style.borderWidth= 1;
      node.style.backgroundColor= color;
    
      node.appendChild(document.createTextNode(val));
    
      return node;
    };
    
    var yStep = 24;
    var width = 800;
    var height = 600;
    
    var RED = "#ffc0c0";
    var BLUE = "#c0c0ff";
    
    container = document.createElement("div");
    container.style.width = width;
    container.style.height = height;
    container.style.border = "solid";
    
    document.body.appendChild(container);
    
    var svgNS = "http://www.w3.org/2000/svg";
    
    function renderLink(x1, y1, x2, y2)
    {
      var left = Math.min(x1,x2);
      var top = Math.min(y1,y2);
    
      var width = 1+Math.abs(x2-x1);
      var height = 1+Math.abs(y2-y1);
    
      var svg = document.createElementNS(svgNS, "svg");
      svg.setAttribute("x", left);
      svg.setAttribute("y",  top);
      svg.setAttribute("width", width );
      svg.setAttribute("height", height );
    
      var line = document.createElementNS(svgNS,"line");
    
      line.setAttribute("x1", (x1 - left) );
      line.setAttribute("x2", (x2 - left) );
      line.setAttribute("y1", (y1 - top) );
      line.setAttribute("y2", (y2 - top) );
      line.setAttribute("stroke-width",  "1");
      line.setAttribute("stroke",  "black");
      svg.appendChild(line);
    
      var div = document.createElement("div");
      div.style.position = "absolute";
      div.style.left = left;
      div.style.top = top;
      div.style.width = width;
      div.style.height = height;
    
      div.appendChild(svg);
      container.appendChild(div);  
    }
    
    function getHeight(dom) {
        var h = dom.offsetHeight;
        return h;
    }
    
    function getWidth(dom) {
        var w = dom.offsetWidth;
        return w;
    }
    
    function renderTree(x, y, node, width, height)
    {
        if(height < 1.5*yStep)
        height = 1.5*yStep;
    
        var val;
        if(node.op) {
          val = node.op;
          color = BLUE;
        }
        else
          if(node.value) {
        val = node.value;
        color = RED;
          }
          else
        val = "?";
    
        var dom = createTreeNode(x, y, val, color);
        container.appendChild(dom);
    
        var w = getWidth(dom);
        var h = getHeight(dom);
    
        var nx, ny;
    
        var child;
    
        if(node.left) {
        nx = x - width/2;
        ny = y+height;
        var child = renderTree(nx, ny, node.left, width/2, height/2);
            renderLink(x+w/2, y+h, nx+getWidth(child)/2, ny);
        }
    
        if(node.right) {
        nx = x + width/2;
        ny = y+height;
    
        child = renderTree(nx, ny, node.right, width/2, height/2);
            renderLink(x+w/2, y+h, nx+getWidth(child)/2, ny);
        }
        return dom;
    }
    
    var root;
    
    function testParser()
    {
      var str = "1+2*5-5*(9+2)";
    
      var exp = document.createElement("div");
      exp.appendChild(document.createTextNode(str));
      container.appendChild(exp);
      var tree = parseString(str);
      renderTree(width/2, 20, tree, width/2, 4*yStep);
    }
    
    </script>
    
    </body>
    </html>
    
    0 讨论(0)
  • 2020-12-31 19:56

    Have you read up on the theory behind parsers? Wikipedia (as always) has some good articles to read:

    • LR parser
    • Recursive descent parser
    0 讨论(0)
  • 2020-12-31 19:57

    The thing to do is to use a parser generator like flex or ANTLR (searching at google will find one for your language).

    But if you are doing this for fun or to learn how parsers work, look up wikipedia for recursive descent parser.

    A simple recursive descent parser can be easily made for simple expressions like this. You can define the grammar as:

    <expression> ::= <term> | <term> <add_op> <expression>
    <term> ::= <factor> | <factor> <mul_op> <term>
    <factor> ::= ( <expression> ) | <number> 
    <add_op> ::= + | -
    <mul_op> ::= * | /
    

    Notice that by making the rule for <term> contain the rule for <factor> this grammar makes sure all multiplication/division operations occur lower in the parse tree than any addition/subtraction. This ensures those operations are evaluated first.

    0 讨论(0)
  • 2020-12-31 19:58

    I built a fun little calculator once and had the same problem as you, which I solved by building the syntax tree without keeping the order precedence in mind,firstly. Each node has a precedence value, and when eval'ing non-constants, I'd check the left node: if it has lower precedence, I'd rotate the tree clockwise: bring it into evaluation and evaluate that first, likewise for the right node. then I'd just try to evaluate again. It seemed to work well enough for me.

    0 讨论(0)
提交回复
热议问题