recursive descent parser and functional programming

后端 未结 5 535
一生所求
一生所求 2021-01-30 11:12

So lately I have been working on writing a simple compiler to better understand compiler concepts. Being a diligent reader of stackoverfolow, it seems there is a consensus that

5条回答
  •  渐次进展
    2021-01-30 11:48

    One strategy for functional parsing is monadic parser combinators. You can read some about it here (and follow links) or use a library like FParsec. I do not recommend this approach if you're just learning/starting F#/compilers, though.

    Another approach with F# is to use FsLex/FsYacc (in the PowerPack). I kinda loathe Lex/Yacc technology, so I also don't recommend this.

    I think you should write a recursive decent parser by hand. I don't have strong feelings regarding a tokenizer, but simply tokeninize the entire file into a(n immutable) list of tokens and then doing recursive descent (and leveraging some pattern-matching) is a good way to to deal with parsing. And of course, you'll want to use discrimated unions to represent the AST output of the parser (a la here).

    I haven't read the dragon book in a long time, but I'm apparently the only person on the planet who doesn't like it. I would consider abandoning that text in favor of a book that discusses compilers using some ML-based language, though I can't recommend one offhand.

    EDIT

    I haven't done one of these in a while, so I took a few minutes to code a small sample.

    // AST for tiny language
    type Op = 
        | Plus 
        | Minus 
    type Expr = 
        | Literal of int 
        | BinaryOp of Expr * Op * Expr // left, op, right 
    type Stmt =
        | IfThenElse of Expr * Stmt * Stmt // cond, then, else; 0=false in cond 
        | Print of Expr
    
    // sample program
    let input = @"
        if 1+1-1 then 
            print 42 
        else 
            print 0"
    
    // expected AST
    let goal = 
        IfThenElse(
            BinaryOp( BinaryOp(Literal(1),Plus,Literal(1)), Minus, Literal(1)), 
            Print(Literal(42)), 
            Print(Literal(0))) 
    
    ////////////////////////////////////////////////////////////////////////////
    // Lexer
    
    type Token =
        | IF
        | THEN
        | ELSE
        | PRINT
        | NUM of int  // non-negative
        | PLUS
        | MINUS
        | EOF
    
    let makeTokenizer (s:string) =
        let i = ref 0
        let keywords = [
            "if", IF 
            "then", THEN
            "else", ELSE
            "print", PRINT
            "+", PLUS
            "-", MINUS ]
        let rec getNextToken() =
            if !i >= s.Length then
                EOF
            elif System.Char.IsWhiteSpace(s.[!i]) then
                incr i
                getNextToken()
            elif System.Char.IsDigit(s.[!i]) then
                let mutable j = !i
                while j < s.Length && System.Char.IsDigit(s.[j]) do
                    j <- j + 1
                let numStr = s.Substring(!i, j - !i)
                i := j
                NUM(System.Int32.Parse(numStr)) // may throw, e.g. if > MAXINT
            else 
                let keyword = keywords |> List.tryPick (fun (kwStr,kwTok) ->
                    if s.IndexOf(kwStr, !i) = !i then
                        i := !i + kwStr.Length
                        Some(kwTok)
                    else
                        None)
                match keyword with
                | Some k -> k
                | None -> 
                    failwith "unexpected char '%c' at position %d" s.[!i] !i
        getNextToken
    
    let tokens = 
        let nextToken = makeTokenizer input
        let t = ref(nextToken())
        [ 
            yield !t
            while !t <> EOF do
                t := nextToken()
                yield !t
        ]
    
    printfn "%A" tokens // sanity check our tokenizer works
    
    /////////////////////////////////////////////////////////////////////////
    // Parser
    
    let parseExpr toks =
        match toks with
        | NUM x :: rest ->
            let mutable rest = rest
            let mutable expr = Literal x
            while rest.Head = PLUS || rest.Head = MINUS do
                let op,y,r = 
                    match rest with
                    | PLUS::NUM y::t -> Plus, Literal y, t
                    | MINUS::NUM y::t -> Minus, Literal y, t
                    | _ -> 
                        failwith "parse error in expression, expected number"
                expr <- BinaryOp(expr, op, y)
                rest <- r
            expr, rest
        | _ -> failwith "parse error in expression, expected number"
    let rec parseStmt toks =
        match toks with
        | PRINT :: rest -> 
            let e,rest = parseExpr(rest)
            Print(e), rest
        | IF :: rest ->
            let e,rest = parseExpr(rest)
            match rest with
            | THEN :: rest ->
                let s1,rest = parseStmt(rest)
                match rest with
                | ELSE :: rest ->
                    let s2,rest = parseStmt(rest)
                    IfThenElse(e,s1,s2), rest
                | _ -> 
                    failwith "parse error after if branch, espected 'else'"
            | _ -> 
                failwith "parse error after if expression, expected 'then'"
        | _ -> failwith "parse error, expected statement"
    let parseProgram toks =
        let s,rest = parseStmt toks
        match rest with
        | [EOF] -> s
        | _ -> failwith "parse error after statement, expected EOF"
    
    let p = parseProgram tokens
    printfn "%A" p
    assert( p = goal )
    

    (Hopefully there are no egregious bugs.)

提交回复
热议问题