语言名为TINY
实例程序:
begin
var x,y:interger;
x:=10;
read(x);
if y<0 then x:=x-y;
x:=x+y;
write(x);
end
TINY语言扫描程序的DFA:
代码
//ExplLexicalAnalyzer.h
#ifndef EXPLLEXICALANALYZER_H
#define EXPLLEXICALANALYZER_H
#define MAXTOKENLEN 40
#define MAXRESERVED 13
typedef enum {
ENDFILE, ERROR,
IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE, VAR, BEGIN, INTEGER, DOUBLE, STRING,
ID, NUM,
ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI, COMMA, DEFINE
} TokenType;
//typedef struct {
// TokenType kind;
// int row = -1;
// int column = -1;
// double value;
// std::string ID;
//} Token;
TokenType getToken(void);
#endif //LEARN_2_EXPLLEXICALANALYZER_H
1 //ExplLexicalAnalyzer.cpp
2 #include <cstdio>
3 #include <iostream>
4 #include <fstream>
5 #include <cstring>
6 #include "ExplLexicalAnalyzer.h"
7
8 using namespace std;
9
10 typedef enum {
11 START, INASSIGN, INCOMMENT, INNUM, INID, DONE
12 } StateType;
13
14 char tokenString[MAXTOKENLEN + 1];
15
16 #define BUFLEN 256
17
18 static char lineBuf[BUFLEN];
19 static int linepos = 0;
20 static int bufsize = 0;
21 static int EOF_flag = false;
22 static string filename;
23 static fstream get;
24 static int lineno = 0;
25 static int columnpos = 0;
26 bool TraceScan = true;
27 StateType state;
28
29 static struct {
30 const char *str;
31 TokenType tok;
32 } reservedWords[MAXRESERVED]
33 = {{"if", IF},
34 {"then", THEN},
35 {"else", ELSE},
36 {"end", END},
37 {"repeat", REPEAT},
38 {"until", UNTIL},
39 {"read", READ},
40 {"write", WRITE},
41 {"begin", BEGIN},
42 {"var", VAR},
43 {"interger", INTEGER},
44 {"double", DOUBLE},
45 {"string", STRING}};
46
47
48 static char
49 getNextChar() {
50 if (linepos >= bufsize) {
51 lineno = 0;
52 if (state != START)
53 return ' ';
54 if (get.getline(lineBuf, BUFLEN - 1)) {
55 printf("%d: %s\n", columnpos++, lineBuf);
56 bufsize = (int) strlen(lineBuf);
57 linepos = 0;
58 return lineBuf[linepos++];
59 } else {
60 return EOF;
61 }
62 } else return lineBuf[linepos++];
63 }
64
65
66 static TokenType reservedLookup(char *s) {
67 int i;
68 for (i = 0; i < MAXRESERVED; i++)
69 if (!strcmp(s, reservedWords[i].str))
70 return reservedWords[i].tok;
71 return ID;
72 }
73
74 //退回一个字符
75 static void ungetNextChar(void) { if (!EOF_flag) linepos--; }
76
77 //打印分析结果
78 void printToken(TokenType token, const char *tokenString) {
79 switch (token) {
80 case IF:
81 case THEN:
82 case ELSE:
83 case END:
84 case REPEAT:
85 case UNTIL:
86 case READ:
87 case WRITE:
88 case BEGIN:
89 case VAR:
90 case INTEGER:
91 case DOUBLE:
92 case STRING:
93 printf("reserved word: %s\n", tokenString);
94 break;
95 case DEFINE:
96 printf(":\n");
97 break;
98 case COMMA:
99 printf(",\n");
100 break;
101 case ASSIGN:
102 printf(":=\n");
103 break;
104 case LT:
105 printf("<\n");
106 break;
107 case EQ:
108 printf("=\n");
109 break;
110 case LPAREN:
111 printf("(\n");
112 break;
113 case RPAREN:
114 printf(")\n");
115 break;
116 case SEMI:
117 printf(";\n");
118 break;
119 case PLUS:
120 printf("+\n");
121 break;
122 case MINUS:
123 printf("-\n");
124 break;
125 case TIMES:
126 printf("*\n");
127 break;
128 case OVER:
129 printf("/\n");
130 break;
131 case ENDFILE:
132 break;
133 case NUM:
134 printf("NUM, val= %s\n", tokenString);
135 break;
136 case ID:
137 printf("ID, name= %s\n", tokenString);
138 break;
139 case ERROR:
140 printf("ERROR: %s\n", tokenString);
141 break;
142 default:
143 printf("Unknown token: %d\n", token);
144 }
145 }
146
147
148 //词法分析
149 TokenType getToken(void) {
150 int tokenStringIndex = 0;
151 TokenType currentToken;
152 state = START;
153 bool save;
154 while (state != DONE) {
155 char c = getNextChar();
156 save = true;
157 switch (state) {
158 case START:
159 if (isdigit(c))
160 state = INNUM;
161 else if (isalpha(c))
162 state = INID;
163 else if (c == ':')
164 state = INASSIGN;
165 else if ((c == ' ') || (c == '\t') || (c == '\n'))
166 save = false;
167 else if (c == '{') {
168 save = false;
169 state = INCOMMENT;
170 } else {
171 state = DONE;
172 switch (c) {
173 case EOF:
174 return ENDFILE;
175 case ',':
176 currentToken = COMMA;
177 break;
178 case '=':
179 currentToken = EQ;
180 break;
181 case '<':
182 currentToken = LT;
183 break;
184 case '+':
185 currentToken = PLUS;
186 break;
187 case '-':
188 currentToken = MINUS;
189 break;
190 case '*':
191 currentToken = TIMES;
192 break;
193 case '/':
194 currentToken = OVER;
195 break;
196 case '(':
197 currentToken = LPAREN;
198 break;
199 case ')':
200 currentToken = RPAREN;
201 break;
202 case ';':
203 currentToken = SEMI;
204 break;
205 default:
206 currentToken = ERROR;
207 break;
208 }
209 }
210 break;
211 case INCOMMENT:
212 save = false;
213 if (c == EOF) {
214 state = DONE;
215 currentToken = ENDFILE;
216 } else if (c == '}') state = START;
217 break;
218 case INASSIGN:
219 state = DONE;
220 if (c == '=')
221 currentToken = ASSIGN;
222 else {
223 currentToken = DEFINE;
224 ungetNextChar();
225 }
226 break;
227 case INNUM:
228 if (!isdigit(c)) {
229 ungetNextChar();
230 save = false;
231 state = DONE;
232 currentToken = NUM;
233 }
234 break;
235 case INID:
236 if (!isalpha(c)) {
237 tokenString[tokenStringIndex] = '\0';
238 if (!strcmp(tokenString, "begin") || !strcmp(tokenString, "end")) {
239 save = false;
240 state = DONE;
241 currentToken = ID;
242 break;
243 }
244 ungetNextChar();
245 save = false;
246 state = DONE;
247 currentToken = ID;
248 }
249 break;
250 case DONE:
251 break;
252 }
253 if ((save) && (tokenStringIndex <= MAXTOKENLEN) && (state != START && !isspace(c)))
254 tokenString[tokenStringIndex++] = c;
255 if (state == DONE) {
256 tokenString[tokenStringIndex] = '\0';
257 if (currentToken == ID)
258 currentToken = reservedLookup(tokenString);
259 }
260 }
261 if (TraceScan) {
262 printf("\t%d: ", lineno++);
263 printToken(currentToken, tokenString);
264 }
265 return currentToken;
266 }
267
268
269 int
270 main() {
271 if (cin >> filename && filename == "q") {
272 filename = "......";
273 }
274 get.open(filename, ios::in);
275 while (getToken() != ENDFILE);
276 }
运行结果:

来源:https://www.cnblogs.com/INnoVationv2/p/5967015.html