词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:字符流(什么输入方式,什么数据结构保存)
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
程序代码:
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
int k=0;
struct word
{
char name[10];
int kind;
} word[1000];
char key[35][10]= {"scanf","short","int","long","float","double","char","struct","union",
"printf","typedef","const","unsigned","signed","extern","register","static",
"volatile","void","if","else","switch","case","for","do","while","goto",
"continue","break","default","sizeof","return","include","bool"
};
bool cmp(char a[])
{
int i;
for(int k=0; k<35; k++)
{
if(strcmp(a,key[k])==0)
return 1;
}
return 0;
}
int main()
{
#ifdef LOCAL
freopen("in.txt", "r", stdin);
freopen("out.txt", "w", stdout);
#endif
int p,q,flag;
char a[1000],b[10],ch;
while(gets(a))
{
p=0;
int len=strlen(a);
while(p<len)
{
ch=a[p];
memset(b,0,sizeof(b));
while(ch==' ')
{
p++;
ch=a[p];
}
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||ch=='_')
{
flag=0;
q=0;
while((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||ch=='_'||(ch>='0'&&ch<='9'))
{
if((ch>='0'&&ch<='9')||ch=='_')
flag=1;
b[q++]=ch;
p++;
ch=a[p];
}
if(flag==1)
{
strcpy(word[k].name,b);
word[k++].kind=1;
}
else if(flag==0)
{
if(ch=='\''||ch=='"')
{
strcpy(word[k].name,b);
word[k++].kind=2;
}
else if(cmp(b)==1)
{
strcpy(word[k].name,b);
word[k++].kind=3;
}
else
{
strcpy(word[k].name,b);
word[k++].kind=1;
}
}
}
else if((ch>='0'&&ch<='9')||ch=='-')
{
int t=p-1;
if(a[t]>='0'&&a[t]<='9'||a[t]>='a'&&a[t]<='z'||a[t]>='A'&&a[t]<='Z')
{
p++;
ch=a[p];
if(ch=='-'||ch=='=')
{
b[0]='-';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='-';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else
{
q=0;
b[q++]=ch;
p++;
ch=a[p];
while((ch>='0'&&ch<='9')||ch=='.')
{
b[q++]=ch;
p++;
ch=a[p];
}
strcpy(word[k].name,b);
word[k++].kind=2;
}
}
else if(ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||ch==','||ch==';'||ch==':'||ch=='\''||ch=='"')//ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||
{
b[0]=ch;
strcpy(word[k].name,b);
word[k++].kind=4;
ch=a[++p];
}
else if(ch=='%'||ch=='^')
{
b[0]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else if(ch=='+')
{
p++;
ch=a[p];
if(ch=='+'||ch=='=')
{
b[0]='+';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='+';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='*')
{
p++;
ch=a[p];
if(ch=='*'||ch=='=')
{
b[0]='*';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='*';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='/')
{
p++;
ch=a[p];
if(ch=='/'||ch=='=')
{
b[0]='/';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='/';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='=')
{
p++;
ch=a[p];
if(ch=='=')
{
b[0]=b[1]='=';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='=';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='>')
{
p++;
ch=a[p];
if(ch=='>'||ch=='=')
{
b[0]='>';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='>';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='<')
{
p++;
ch=a[p];
if(ch=='<'||ch=='=')
{
b[0]='<';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='<';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='!')
{
p++;
ch=a[p];
if(ch=='=')
{
b[0]='!';
b[1]='=';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='!';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='&')
{
p++;
ch=a[p];
if(ch=='&')
{
b[0]=b[1]='&';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='&';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='|')
{
p++;
ch=a[p];
if(ch=='|')
{
b[0]=b[1]='|';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='|';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
}
}
for(int i=0; i<k; i++)
{
switch(word[i].kind)
{
case 1:
{
printf("(标识符,");
break;
}
case 2:
{
printf("(常量,");
break;
}
case 3:
{
printf("(关键字,");
break;
}
case 4:
{
printf("(界符,");
break;
}
case 5:
{
printf("(运算符,");
break;
}
}
printf("%s)\n",word[i].name);
}
int a1=0,a2=0,a3=0,a4=0,a5=0;
for(int i=0;i<k;i++)
{
if(word[i].kind==1)
a1++;
else if(word[i].kind==2)
a2++;
else if(word[i].kind==3)
a3++;
else if(word[i].kind==4)
a4++;
else if(word[i].kind==5)
a5++;
}
printf("标识符:%d\n常量:%d\n关键字:%d\n界符:%d\n运算符:%d\n",a1,a2,a3,a4,a5);
return 0;
}
程序截图:
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
int k=0;
struct word
{
char name[10];
int kind;
} word[1000];
char key[35][10]= {"scanf","short","int","long","float","double","char","struct","union",
"printf","typedef","const","unsigned","signed","extern","register","static",
"volatile","void","if","else","switch","case","for","do","while","goto",
"continue","break","default","sizeof","return","include","bool"
};
bool cmp(char a[])
{
int i;
for(int k=0; k<35; k++)
{
if(strcmp(a,key[k])==0)
return 1;
}
return 0;
}
int main()
{
#ifdef LOCAL
freopen("in.txt", "r", stdin);
freopen("out.txt", "w", stdout);
#endif
int p,q,flag;
char a[1000],b[10],ch;
while(gets(a))
{
p=0;
int len=strlen(a);
while(p<len)
{
ch=a[p];
memset(b,0,sizeof(b));
while(ch==' ')
{
p++;
ch=a[p];
}
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||ch=='_')
{
flag=0;
q=0;
while((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||ch=='_'||(ch>='0'&&ch<='9'))
{
if((ch>='0'&&ch<='9')||ch=='_')
flag=1;
b[q++]=ch;
p++;
ch=a[p];
}
if(flag==1)
{
strcpy(word[k].name,b);
word[k++].kind=1;
}
else if(flag==0)
{
if(ch=='\''||ch=='"')
{
strcpy(word[k].name,b);
word[k++].kind=2;
}
else if(cmp(b)==1)
{
strcpy(word[k].name,b);
word[k++].kind=3;
}
else
{
strcpy(word[k].name,b);
word[k++].kind=1;
}
}
}
else if((ch>='0'&&ch<='9')||ch=='-')
{
int t=p-1;
if(a[t]>='0'&&a[t]<='9'||a[t]>='a'&&a[t]<='z'||a[t]>='A'&&a[t]<='Z')
{
p++;
ch=a[p];
if(ch=='-'||ch=='=')
{
b[0]='-';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='-';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else
{
q=0;
b[q++]=ch;
p++;
ch=a[p];
while((ch>='0'&&ch<='9')||ch=='.')
{
b[q++]=ch;
p++;
ch=a[p];
}
strcpy(word[k].name,b);
word[k++].kind=2;
}
}
else if(ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||ch==','||ch==';'||ch==':'||ch=='\''||ch=='"')//ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||
{
b[0]=ch;
strcpy(word[k].name,b);
word[k++].kind=4;
ch=a[++p];
}
else if(ch=='%'||ch=='^')
{
b[0]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else if(ch=='+')
{
p++;
ch=a[p];
if(ch=='+'||ch=='=')
{
b[0]='+';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='+';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='*')
{
p++;
ch=a[p];
if(ch=='*'||ch=='=')
{
b[0]='*';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='*';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='/')
{
p++;
ch=a[p];
if(ch=='/'||ch=='=')
{
b[0]='/';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='/';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='=')
{
p++;
ch=a[p];
if(ch=='=')
{
b[0]=b[1]='=';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='=';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='>')
{
p++;
ch=a[p];
if(ch=='>'||ch=='=')
{
b[0]='>';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='>';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='<')
{
p++;
ch=a[p];
if(ch=='<'||ch=='=')
{
b[0]='<';
b[1]=ch;
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='<';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='!')
{
p++;
ch=a[p];
if(ch=='=')
{
b[0]='!';
b[1]='=';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='!';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='&')
{
p++;
ch=a[p];
if(ch=='&')
{
b[0]=b[1]='&';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='&';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
else if(ch=='|')
{
p++;
ch=a[p];
if(ch=='|')
{
b[0]=b[1]='|';
strcpy(word[k].name,b);
word[k++].kind=5;
ch=a[++p];
}
else
{
b[0]='|';
strcpy(word[k].name,b);
word[k++].kind=5;
}
}
}
}
for(int i=0; i<k; i++)
{
switch(word[i].kind)
{
case 1:
{
printf("(标识符,");
break;
}
case 2:
{
printf("(常量,");
break;
}
case 3:
{
printf("(关键字,");
break;
}
case 4:
{
printf("(界符,");
break;
}
case 5:
{
printf("(运算符,");
break;
}
}
printf("%s)\n",word[i].name);
}
int a1=0,a2=0,a3=0,a4=0,a5=0;
for(int i=0;i<k;i++)
{
if(word[i].kind==1)
a1++;
else if(word[i].kind==2)
a2++;
else if(word[i].kind==3)
a3++;
else if(word[i].kind==4)
a4++;
else if(word[i].kind==5)
a5++;
}
printf("标识符:%d\n常量:%d\n关键字:%d\n界符:%d\n运算符:%d\n",a1,a2,a3,a4,a5);
return 0;
}