词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:字符流(什么输入方式,什么数据结构保存)
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
|
单词符号 |
种别码 |
单词符号 |
种别码 |
|
begin |
1 |
: |
17 |
|
if |
2 |
:= |
18 |
|
then |
3 |
< |
20 |
|
while |
4 |
<= |
21 |
|
do |
5 |
<> |
22 |
|
end |
6 |
> |
23 |
|
l(l|d)* |
10 |
>= |
24 |
|
dd* |
11 |
= |
25 |
|
+ |
13 |
; |
26 |
|
- |
14 |
( |
27 |
|
* |
15 |
) |
28 |
|
/ |
16 |
# |
0 |
首先先说下我的思路,先建立map<string, string> m;存储符号及其种别码
map<string, string> m;
void init(){
m["begin"]="1";
m["if"]="2";
m["then"]="3";
m["while"]="4";
m["do"]="5";
m["end"]="6";
m["+"]="13";
m["-"]="14";
m["*"]="15";
m["/"]="16";
m[":"]="17";
m[":="]="18";
m["<"]="20";
m["<="]="21";
m["<>"]="22";
m[">"]="23";
m[">="]="24";
m["="]="25";
m[";"]="26";
m["("]="27";
m[")"]="28";
m["#"]="0";
}
而后进行字符串录入以#作为结束录入的标志。在c++中,cin用于string类型录入时会将空格作为结束符从而导致空格后的内容无法录入,故采用getline()函数来进行字符串的录入。
string str;
string s;
cout<<"请输入字符串:"<<endl;
do{
getline(cin,s);
str+=s;
if(str[str.size()-1]=='#'){
break;
}
str+="\n";
}while(1);
而后将会字符串进行处理,将特殊符号和标识符以空格的方式隔开,即插入空格到指定位置。
//插入空格
for(int i=0;i<str.size();i++){
switch(str[i]){
case '+':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '-':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '*':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
/**/
case '/':
if(str[i+1]=='*'){
for(int k=i;k<str.size();k++){
if(str[k]=='/' && str[k-1]=='*'){
cout<<"成功进入"<<endl;
str.erase(i,k-i+1);
str.insert(i," ");
chock=1;
break;
}
}
if(chock==0){
chock=0;
cout<<"注释方式错误不完整,错误位置第"<<row<<"行"<<endl ;
exit(0);
}
}
else{
str.insert(i," ");
i++;
str.insert(i+1," ");
}
break;
case ':':
str.insert(i," ");
if(str[i+2]=='=')i=i+2;else i++;
str.insert(i+1," ");
break;
case '<':
str.insert(i," ");
if(str[i+2]=='=' || str[i+2]=='>')i=i+2;else i++;
str.insert(i+1," ");
break;
case '>':
str.insert(i," ");
if(str[i+2]=='=')i=i+2;else i++;
str.insert(i+1," ");
break;
case '=':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case ';':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '(':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case ')':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '#':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '\n':
str.insert(i," ");
i++;
row++;
str.insert(i+1," ");
break;
}
}
之后再将处理过的字符串用空格相隔开,不比c#,c++没有字符串切割函数只能直接编写
//字符串切割函数
vector<string> split(const string& str, const string& delim) {
vector<string> res;
if("" == str) return res;
//先将要切割的字符串从string类型转换为char*类型
char * strs = new char[str.length() + 1] ;
strcpy(strs, str.c_str());
char * d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char *p = strtok(strs, d);
while(p) {
string s = p; //分割得到的字符串转换为string类型
res.push_back(s); //存入结果数组
p = strtok(NULL, d);
}
return res;
}
而后得到的vector<string>遍历从map中找出对应值即可。
//输出对应序列
for(int i = 0; i < str_list.size(); i++)
{
if (m.find(str_list[i]) != m.end())
cout<<"\t< "<<str_list[i] <<","<<m[str_list[i]]<<" >"<<endl;
else if(AllisNum(str_list[i]))cout<<"\t< "<<str_list[i] <<","<<11<<" >"<<endl;
else cout<<"\t< "<<str_list[i] <<","<<10<<" >"<<endl;
}
最后附上完整代码
#include <iostream>
#include <ext/hash_map>
#include <string.h>
#include<map>
using std::map;
using namespace __gnu_cxx;
using namespace std;
map<string, string> m;
void init(){
m["begin"]="1";
m["if"]="2";
m["then"]="3";
m["while"]="4";
m["do"]="5";
m["end"]="6";
m["+"]="13";
m["-"]="14";
m["*"]="15";
m["/"]="16";
m[":"]="17";
m[":="]="18";
m["<"]="20";
m["<="]="21";
m["<>"]="22";
m[">"]="23";
m[">="]="24";
m["="]="25";
m[";"]="26";
m["("]="27";
m[")"]="28";
m["#"]="0";
}
//字符串切割函数
vector<string> split(const string& str, const string& delim) {
vector<string> res;
if("" == str) return res;
//先将要切割的字符串从string类型转换为char*类型
char * strs = new char[str.length() + 1] ;
strcpy(strs, str.c_str());
char * d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char *p = strtok(strs, d);
while(p) {
string s = p; //分割得到的字符串转换为string类型
res.push_back(s); //存入结果数组
p = strtok(NULL, d);
}
return res;
}
//辨别字符串是否为纯数字
bool AllisNum(string str)
{
for (int i = 0; i < str.size(); i++)
{
int tmp = (int)str[i];
if (tmp >= 48 && tmp <= 57)
{
continue;
}
else
{
return false;
}
}
return true;
}
int main() {
int chock=0;
int row=1;
init();
//string str="begin a/*:=bc/:()d>=eas;dasd";
string str;
string s;
cout<<"请输入字符串:"<<endl;
do{
getline(cin,s);
str+=s;
if(str[str.size()-1]=='#'){
break;
}
str+="\n";
}while(1);
//插入空格
for(int i=0;i<str.size();i++){
switch(str[i]){
case '+':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '-':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '*':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
/**/
case '/':
if(str[i+1]=='*'){
for(int k=i;k<str.size();k++){
if(str[k]=='/' && str[k-1]=='*'){
cout<<"成功进入"<<endl;
str.erase(i,k-i+1);
str.insert(i," ");
chock=1;
break;
}
}
if(chock==0){
chock=0;
cout<<"注释方式错误不完整,错误位置第"<<row<<"行"<<endl ;
exit(0);
}
}
else{
str.insert(i," ");
i++;
str.insert(i+1," ");
}
break;
case ':':
str.insert(i," ");
if(str[i+2]=='=')i=i+2;else i++;
str.insert(i+1," ");
break;
case '<':
str.insert(i," ");
if(str[i+2]=='=' || str[i+2]=='>')i=i+2;else i++;
str.insert(i+1," ");
break;
case '>':
str.insert(i," ");
if(str[i+2]=='=')i=i+2;else i++;
str.insert(i+1," ");
break;
case '=':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case ';':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '(':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case ')':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '#':
str.insert(i," ");
i++;
str.insert(i+1," ");
break;
case '\n':
str.insert(i," ");
i++;
row++;
str.insert(i+1," ");
break;
}
}
for(int i=0;i<str.size();i++){
cout<<str[i];
}
cout<<endl;
//切割空格
vector<string> str_list=split(str," ");
//输出对应序列
for(int i = 0; i < str_list.size(); i++)
{
if (m.find(str_list[i]) != m.end())
cout<<"\t< "<<str_list[i] <<","<<m[str_list[i]]<<" >"<<endl;
else if(AllisNum(str_list[i]))cout<<"\t< "<<str_list[i] <<","<<11<<" >"<<endl;
else cout<<"\t< "<<str_list[i] <<","<<10<<" >"<<endl;
}
return 0;
}