词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:字符流(什么输入方式,什么数据结构保存)
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
单词符号 |
种别码 |
单词符号 |
种别码 |
begin |
1 |
: |
17 |
if |
2 |
:= |
18 |
then |
3 |
< |
20 |
while |
4 |
<= |
21 |
do |
5 |
<> |
22 |
end |
6 |
> |
23 |
l(l|d)* |
10 |
>= |
24 |
dd* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
#include <stdio.h>
#include <string.h>
#define max 200
char
pro[max],lin[20];
int
n,i,syn;
char
*word[6]={
"begin"
,
"if"
,
"then"
,
"while"
,
"do"
,
"end"
};
char
ch;
void
scaner(){
int
j=0;
for
(n=0;n<20;n++)
lin[n]=NULL;
ch=pro[i++];
while
(ch==
' '
)
ch=pro[i++];
if
((ch>=
'A'
&&ch<=
'Z'
)||(ch>=
'a'
&&ch<=
'z'
)||(ch>=48&&ch<=57))
{
if
((ch>=
'A'
&&ch<=
'Z'
)||(ch>=
'a'
&&ch<=
'z'
)){
lin[j++]=ch;
ch=pro[i++];
while
((ch>=
'A'
&&ch<=
'Z'
)||(ch>=
'a'
&&ch<=
'z'
)||(ch>=48&&ch<=57))
{
lin[j++]=ch;
ch=pro[i++];
}
lin[j]=
'\0'
;
i--;
syn=10;
for
(n=0;n<6;n++)
if
(strcmp(lin,word[n])==0)
{
syn=n+1;
break
;
}
}
else
{
while
(ch>=48&&ch<=57)
{
lin[j++]=ch;
ch=pro[i++];
}
i--;
lin[j]=
'\0'
;
syn=11;
}
}
else
{
if
(ch==
'<'
)
{
lin[j++]=ch;
ch=pro[i++];
if
(ch==
'>'
)
{
syn=21;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
'='
)
{
syn=22;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
{
syn=20;
lin[j]=
'\0'
;
i--;
}
}
else
if
(ch==
'>'
)
{
lin[j++]=ch;
ch=pro[i++];
if
(ch==
'='
)
{
syn=24;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
{
syn=23;
lin[j]=
'\0'
;
i--;
}
}
else
if
(ch==
':'
)
{
lin[j++]=ch;
ch=pro[i++];
if
(ch==
'='
)
{
syn=18;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
{
syn=17;
lin[j]=
'\0'
;
i--;
}
}
else
if
(ch==
'+'
)
{
syn=13;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
'-'
)
{
syn=14;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
'*'
)
{
syn=16;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
'/'
)
{
syn=16;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
'='
)
{
syn=25;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
';'
)
{
syn=26;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
'('
)
{
syn=27;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
')'
)
{
syn=28;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
if
(ch==
'#'
)
{
syn=0;
lin[j++]=ch;
lin[j]=
'\0'
;
}
else
syn=-1;
}
return
;
}
void
read()
{
FILE *fp;
int
k;
fp=fopen(
"shi_yan.txt"
,
"r"
);
for
(k=0;k<max-1;k++)
{
fscanf(fp,
"%c"
,&pro[k]);
}
pro[k]=
'\0'
;
fclose(fp);
}
int
main(
void
)
{
int
i=0;
printf(
"**************************************************************************\n"
);
printf(
"*********************************种别编码*********************************\n"
);
printf(
"**************************************************************************\n"
);
printf(
"(1,begin) (2,if) (3,then) (4,while) (5,do)\n"
);
printf(
"(6,end) (10,基本标示符) (11,数字) (13,+) (14,-)\n"
);
printf(
"(15,*) (16,/) (17,:) (18,:=) (20,<)\n"
);
printf(
"(21,<>) (22,<=) (23,>) (24,>=) (25,=)\n"
);
printf(
"(26,;) (27,() (28,)) (0,#)\n"
);
read();
printf(
"\n\n读入的字符串如下:\n\n"
);
puts(pro);
printf(
"\n词法分析结果如下:\n"
);
printf(
"\n"
);
do
{
scaner();
switch
(syn)
{
case
11:
printf(
"(%d,%s)\n"
,syn,lin);
break
;
case
-1:
printf(
"错误符号\n"
);
break
;
default
:
printf(
"(%d,%s)\n"
,syn,lin);
}
}
while
(syn!=0);
printf(
"\n词法分析结束\n"
);
scanf(
"%c"
,&i);
return
0;
}
来源:https://www.cnblogs.com/226aa/p/11656134.html