How to parse multiple line code using RPLY library?

一世执手 提交于 2021-02-04 08:36:45

问题


I am working on the development of a new language and I am using RPLY library for lexing and parsing purposes. Now I am stuck at getting an error when I use more than one line in the code file.

here are my files:-

mylexer.py

from rply import LexerGenerator


class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('MOD', r'\%')
        # Number
        #self.lexer.add('INT', r'^[-+]?\d+$')
        self.lexer.add('NUMBER',r'[-+]?[0-9]*\.?[0-9]+')
        # Ignore spaces
        self.lexer.ignore('\s+')
        #self.lexer.ignore('\n+')
        #self.lexer.ignore('\\*.*?\\*/')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()

myparser.py

from rply import ParserGenerator
from ast import *


class Parser():
    def __init__(self):
        self.pg = ParserGenerator(
            # A list of all token names accepted by the parser.
            ['NUMBER', 'PRINT', 'OPEN_PAREN', 'CLOSE_PAREN',
             'SEMI_COLON', 'SUM', 'SUB','MUL','DIV','MOD']
        )

    def parse(self):

        @self.pg.production('program : PRINT OPEN_PAREN expression CLOSE_PAREN SEMI_COLON')
        def program(p):
            return Print(p[2])

        @self.pg.production('expression : expression SUM expression')
        @self.pg.production('expression : expression SUB expression')
        @self.pg.production('expression : expression MUL expression')
        @self.pg.production('expression : expression DIV expression')
        @self.pg.production('expression : expression MOD expression')
        def expression(p):
            left = p[0]
            right = p[2]
            operator = p[1]
            if operator.gettokentype() == 'SUM':
                return Sum(left, right)
            elif operator.gettokentype() == 'SUB':
                return Sub(left, right)
            elif operator.gettokentype() == 'MUL':
                return Mul(left, right)
            elif operator.gettokentype() == 'DIV':
                return Div(left, right)
            elif operator.gettokentype() == 'MOD':
                return Mod(left, right)

        @self.pg.production('expression : NUMBER')
        def number(p):
            return Number(p[0].value)

        @self.pg.error
        def error_handle(token):
            raise ValueError(token)

    def get_parser(self):
        return self.pg.build()

ast.py

class Number():
    def __init__(self, value):
        self.value = value

    def eval(self):
        try:
            return int(self.value)
        except ValueError:
            return float(self.value)


class BinaryOp():
    def __init__(self, left, right):
        self.left = left
        self.right = right


class Sum(BinaryOp):
    def eval(self):
        return self.left.eval() + self.right.eval()


class Sub(BinaryOp):
    def eval(self):
        return self.left.eval() - self.right.eval()

class Mul(BinaryOp):
    def eval(self):
        return self.left.eval() * self.right.eval()

class Div(BinaryOp):
    def eval(self):
        return self.left.eval() / self.right.eval()

class Mod(BinaryOp):
    def eval(self):
        return self.left.eval() % self.right.eval()



class Print():
    def __init__(self, value):
        self.value = value

    def eval(self):
        print(self.value.eval())

main.py

from mylexer import Lexer
from myparser import Parser

data=""""""
with open('test.vlj', 'r') as file:
    data = file.read()

lexer = Lexer().get_lexer()
tokens = lexer.lex(data)
# print(type(tokens))
# for token in tokens:
#     print(token)

pg = Parser()
pg.parse()
parser = pg.get_parser()
parser.parse(tokens).eval()

test.vlj

print(12.5*2);
print(150+17.5);

When I write only one line in test.vlj it gets evaluated but the problem arises when using more than a line. How should I write my production to parse multiple lines of code? Also if possible can you give me some links to tutorials and docs to learn RPLY.


回答1:


program : PRINT OPEN_PAREN expression CLOSE_PAREN SEMI_COLON

According to this, a program consists of a single print statement, so print(12.5*2); print(150+17.5); simply doesn't conform to the grammar.

I would suggest renaming the above rule to statement and then have a program rule that can match one or more statements. The productions for that would be program : program statement and program : statement.



来源:https://stackoverflow.com/questions/60016733/how-to-parse-multiple-line-code-using-rply-library

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!