String input to flex lexer

前端 未结 8 562
梦如初夏
梦如初夏 2020-11-29 04:42

I want to create a read-eval-print loop using flex/bison parser. Trouble is, the flex generated lexer wants input of type FILE* and i would like it to be char*. Is there an

相关标签:
8条回答
  • 2020-11-29 04:45

    flex can parse char * using any one of three functions: yy_scan_string(), yy_scan_buffer(), and yy_scan_bytes() (see the documentation). Here's an example of the first:

    typedef struct yy_buffer_state * YY_BUFFER_STATE;
    extern int yyparse();
    extern YY_BUFFER_STATE yy_scan_string(char * str);
    extern void yy_delete_buffer(YY_BUFFER_STATE buffer);
    
    int main(){
        char string[] = "String to be parsed.";
        YY_BUFFER_STATE buffer = yy_scan_string(string);
        yyparse();
        yy_delete_buffer(buffer);
        return 0;
    }
    

    The equivalent statements for yy_scan_buffer() (which requires a doubly null-terminated string):

    char string[] = "String to be parsed.\0";
    YY_BUFFER_STATE buffer = yy_scan_buffer(string, sizeof(string));
    

    My answer reiterates some of the information provided by @dfa and @jlholland, but neither of their answers' code seemed to be working for me.

    0 讨论(0)
  • 2020-11-29 04:46

    The accepted answer is incorrect. It will cause memory leaks.

    Internally, yy_scan_string calls yy_scan_bytes which, in turn, calls yy_scan_buffer.

    yy_scan_bytes allocates memory for a COPY of the input buffer.

    yy_scan_buffer works directly upon the supplied buffer.

    With all three forms, you MUST call yy_delete_buffer to free the flex buffer-state information (YY_BUFFER_STATE).

    However, with yy_scan_buffer, you avoid the internal allocation/copy/free of the internal buffer.

    The prototype for yy_scan_buffer does NOT take a const char* and you MUST NOT expect the contents to remain unchanged.

    If you allocated memory to hold your string, you are responsible for freeing it AFTER you call yy_delete_buffer.

    Also, don't forget to have yywrap return 1 (non-zero) when you're parsing JUST this string.

    Below is a COMPLETE example.

    %%
    
    <<EOF>> return 0;
    
    .   return 1;
    
    %%
    
    int yywrap()
    {
        return (1);
    }
    
    int main(int argc, const char* const argv[])
    {
        FILE* fileHandle = fopen(argv[1], "rb");
        if (fileHandle == NULL) {
            perror("fopen");
            return (EXIT_FAILURE);
        }
    
        fseek(fileHandle, 0, SEEK_END);
        long fileSize = ftell(fileHandle);
        fseek(fileHandle, 0, SEEK_SET);
    
        // When using yy_scan_bytes, do not add 2 here ...
        char *string = malloc(fileSize + 2);
    
        fread(string, fileSize, sizeof(char), fileHandle);
    
        fclose(fileHandle);
    
        // Add the two NUL terminators, required by flex.
        // Omit this for yy_scan_bytes(), which allocates, copies and
        // apends these for us.   
        string[fileSize] = '\0';
        string[fileSize + 1] = '\0';
    
        // Our input file may contain NULs ('\0') so we MUST use
        // yy_scan_buffer() or yy_scan_bytes(). For a normal C (NUL-
        // terminated) string, we are better off using yy_scan_string() and
        // letting flex manage making a copy of it so the original may be a
        // const char (i.e., literal) string.
        YY_BUFFER_STATE buffer = yy_scan_buffer(string, fileSize + 2);
    
        // This is a flex source file, for yacc/bison call yyparse()
        // here instead ...
        int token;
        do {
            token = yylex(); // MAY modify the contents of the 'string'.
        } while (token != 0);
    
        // After flex is done, tell it to release the memory it allocated.    
        yy_delete_buffer(buffer);
    
        // And now we can release our (now dirty) buffer.
        free(string);
    
        return (EXIT_SUCCESS);
    }
    
    0 讨论(0)
  • 2020-11-29 04:49

    Here is what I needed to do :

    extern yy_buffer_state;
    typedef yy_buffer_state *YY_BUFFER_STATE;
    extern int yyparse();
    extern YY_BUFFER_STATE yy_scan_buffer(char *, size_t);
    
    int main(int argc, char** argv) {
    
      char tstr[] = "line i want to parse\n\0\0";
      // note yy_scan_buffer is is looking for a double null string
      yy_scan_buffer(tstr, sizeof(tstr));
      yy_parse();
      return 0;
    }
    

    you cannot extern the typedef, which make sense when you think about it.

    0 讨论(0)
  • 2020-11-29 04:50

    Other-way, you can redefine function YY_INPUT in lex file, and then set your string to LEX's input. As below:

    #undef YY_INPUT
    #define YY_INPUT(buf) (my_yyinput(buf))
    
    char my_buf[20];
    
    void set_lexbuf(char *org_str)
    {  strcpy(my_buf, org_str);  }
    
    void my_yyinput (char *buf)
    {  strcpy(buf, my_buf);      } 
    

    In your main.c, before scanning, you need to set lex's buffer first:

    set_lexbuf(your_string);
    scanning...
    
    0 讨论(0)
  • 2020-11-29 04:53

    See this section of Flex's manual for information on how to scan in-memory buffers, such as strings.

    0 讨论(0)
  • 2020-11-29 04:59

    here is a small example for using bison / flex as a parser inside your cpp code for parsing string and changing a string value according to it (few parts of the code were removed so there might be irrelevant parts there.) parser.y :

    %{
    #include "parser.h"
    #include "lex.h"
    #include <math.h> 
    #include <fstream>
    #include <iostream> 
    #include <string>
    #include <vector>
    using namespace std;
     int yyerror(yyscan_t scanner, string result, const char *s){  
        (void)scanner;
        std::cout << "yyerror : " << *s << " - " << s << std::endl;
        return 1;
      }
        %}
    
    %code requires{
    #define YY_TYPEDEF_YY_SCANNER_T 
    typedef void * yyscan_t;
    #define YYERROR_VERBOSE 0
    #define YYMAXDEPTH 65536*1024 
    #include <math.h> 
    #include <fstream>
    #include <iostream> 
    #include <string>
    #include <vector>
    }
    %output "parser.cpp"
    %defines "parser.h"
    %define api.pure full
    %lex-param{ yyscan_t scanner }
    %parse-param{ yyscan_t scanner } {std::string & result}
    
    %union {
      std::string *  sval;
    }
    
    %token TOKEN_ID TOKEN_ERROR TOKEN_OB TOKEN_CB TOKEN_AND TOKEN_XOR TOKEN_OR TOKEN_NOT
    %type <sval>  TOKEN_ID expression unary_expression binary_expression
    %left BINARY_PRIO
    %left UNARY_PRIO
    %%
    
    top:
    expression {result = *$1;}
    ;
    expression:
    TOKEN_ID  {$$=$1; }
    | TOKEN_OB expression TOKEN_CB  {$$=$2;}
    | binary_expression  {$$=$1;}
    | unary_expression  {$$=$1;}
    ;
    
    unary_expression:
     TOKEN_NOT expression %prec UNARY_PRIO {result =  " (NOT " + *$2 + " ) " ; $$ = &result;}
    ;
    binary_expression:
    expression expression  %prec BINARY_PRIO {result = " ( " + *$1+ " AND " + *$2 + " ) "; $$ = &result;}
    | expression TOKEN_AND expression %prec BINARY_PRIO {result = " ( " + *$1+ " AND " + *$3 + " ) "; $$ = &result;} 
    | expression TOKEN_OR expression %prec BINARY_PRIO {result = " ( " + *$1 + " OR " + *$3 + " ) "; $$ = &result;} 
    | expression TOKEN_XOR expression %prec BINARY_PRIO {result = " ( " + *$1 + " XOR " + *$3 + " ) "; $$ = &result;} 
    ;
    
    %%
    
    lexer.l : 
    
    %{
    #include <string>
    #include "parser.h"
    
    %}
    %option outfile="lex.cpp" header-file="lex.h"
    %option noyywrap never-interactive
    %option reentrant
    %option bison-bridge
    
    %top{
    /* This code goes at the "top" of the generated file. */
    #include <stdint.h>
    }
    
    id        ([a-zA-Z][a-zA-Z0-9]*)+
    white     [ \t\r]
    newline   [\n]
    
    %%
    {id}                    {    
        yylval->sval = new std::string(yytext);
        return TOKEN_ID;
    }
    "(" {return TOKEN_OB;}
    ")" {return TOKEN_CB;}
    "*" {return TOKEN_AND;}
    "^" {return TOKEN_XOR;}
    "+" {return TOKEN_OR;}
    "!" {return TOKEN_NOT;}
    
    {white};  // ignore white spaces
    {newline};
    . {
    return TOKEN_ERROR;
    }
    
    %%
    
    usage : 
    void parse(std::string& function) {
      string result = "";
      yyscan_t scanner;
      yylex_init_extra(NULL, &scanner);
      YY_BUFFER_STATE state = yy_scan_string(function.c_str() , scanner);
      yyparse(scanner,result);
      yy_delete_buffer(state, scanner);
      yylex_destroy(scanner);
      function = " " + result + " ";  
    }
    
    makefile:
    parser.h parser.cpp: parser.y
        @ /usr/local/bison/2.7.91/bin/bison -y -d parser.y
    
    
    lex.h lex.cpp: lexer.l
        @ /usr/local/flex/2.5.39/bin/flex lexer.l
    
    clean:
        - \rm -f *.o parser.h parser.cpp lex.h lex.cpp
    
    0 讨论(0)
提交回复
热议问题