How best to parse a simple grammar?

前端 未结 5 1120
礼貌的吻别
礼貌的吻别 2020-12-07 15:13

Ok, so I\'ve asked a bunch of smaller questions about this project, but I still don\'t have much confidence in the designs I\'m coming up with, so I\'m going to ask a questi

5条回答
  •  谎友^
    谎友^ (楼主)
    2020-12-07 15:30

    def parse(astr):
        astr=astr.replace(',','')
        astr=astr.replace('and','')    
        tokens=astr.split()
        dept=None
        number=None
        result=[]
        option=[]
        for tok in tokens:
            if tok=='or':
                result.append(option)
                option=[]
                continue
            if tok.isalpha():
                dept=tok
                number=None
            else:
                number=int(tok)
            if dept and number:
                option.append((dept,number))
        else:
            if option:
                result.append(option)
        return result
    
    if __name__=='__main__':
        tests=[ ("CS 2110" , [[("CS", 2110)]]),
                ("CS 2110 and INFO 3300" , [[("CS", 2110), ("INFO", 3300)]]),
                ("CS 2110, INFO 3300" , [[("CS", 2110), ("INFO", 3300)]]),
                ("CS 2110, 3300, 3140", [[("CS", 2110), ("CS", 3300), ("CS", 3140)]]),
                ("CS 2110 or INFO 3300", [[("CS", 2110)], [("INFO", 3300)]]),
                ("MATH 2210, 2230, 2310, or 2940", [[("MATH", 2210), ("MATH", 2230), ("MATH", 2310)], [("MATH", 2940)]])]
    
        for test,answer in tests:
            result=parse(test)
            if result==answer:
                print('GOOD: {0} => {1}'.format(test,answer))
            else:
                print('ERROR: {0} => {1} != {2}'.format(test,result,answer))
                break
    

    yields

    GOOD: CS 2110 => [[('CS', 2110)]]
    GOOD: CS 2110 and INFO 3300 => [[('CS', 2110), ('INFO', 3300)]]
    GOOD: CS 2110, INFO 3300 => [[('CS', 2110), ('INFO', 3300)]]
    GOOD: CS 2110, 3300, 3140 => [[('CS', 2110), ('CS', 3300), ('CS', 3140)]]
    GOOD: CS 2110 or INFO 3300 => [[('CS', 2110)], [('INFO', 3300)]]
    GOOD: MATH 2210, 2230, 2310, or 2940 => [[('MATH', 2210), ('MATH', 2230), ('MATH', 2310)], [('MATH', 2940)]]
    

提交回复
热议问题