单词种类 | 单词符号 | 种别码 | 单词种类 | 单词符号 | 种别码 |
---|---|---|---|---|---|
整型常数 | digit digit* | 1 | 运算符 | * | 20 |
字符串(标识符ID) | letter(letter|digit)* | 2 | 运算符 | / | 21 |
关键字 | main | 3 | 运算符 | = | 22 |
关键字 | if | 4 | 运算符 | >= | 23 |
关键字 | else | 5 | 运算符 | < | 24 |
关键字 | do | 6 | 运算符 | <= | 25 |
关键字 | while | 7 | 运算符 | == | 26 |
关键字 | for | 8 | 界符 | ; | 27 |
关键字 | switch | 9 | 界符 | " | 28 |
关键字 | case | 10 | 运算符 | ++ | 29 |
关键字 | int | 11 | 运算符 | - - | 30 |
关键字 | double | 12 | /* | 31 | |
关键字 | float | 13 | */ | 32 | |
关键字 | long | 14 | 界符 | { | 33 |
关键字 | void | 15 | 界符 | } | 34 |
界符 | ( | 16 | 界符 | , | 35 |
界符 | ) | 17 | 运算符 | ! | 36 |
运算符 | + | 18 | 运算符 | != | 37 |
运算符 | - | 19 | EOF | -1 | |
运算符 | * | 20 | 界符 | # | -2 |
其他 | -10 |
我写的包括编译预处理,宏替换,生成新文件new.txt
#include<iostream>
#include <fstream>
#include <stdio.h>
#include <string.h>
#include <cstring>
#include<cstdio>
#include <stack>
using namespace std;
const int max_word = 505;
char token[12];
char in[105];
FILE *fin,*fout;
int cnt = 0,token_num = 0;
int row = 1;
int flag = 0;
char ch;
//关键字
const char keyWord[13][20] = {"main","if","else","do","while","for","switch",
"case","int","double","float","long","void"};
string defination(string content);
string removezhushi(string oldpath);
string removespace(string content);
//初始化数组
void init_token(){
int i;
for(i = 0;i < 12;i++){
token[i] = '\0';
}
}
//预处理
string preprocess(string oldpath)
{
string content;
content=removezhushi(oldpath);
content=defination(content);
content=removespace(content);
cout<<content<<endl;
//数值转换
ofstream fout("new.txt");//将content写入新文件
fout<<content;
return content;
}
//去空格
string removespace(string content)
{
//去掉多余空格
int i=0;
while(i<content.length()-1)
{
int j=i;
if(content[j]==' '&&content[j+1]==' ')
{
int begin=j;
j+=2;
while((j<content.length())&&(content[j]==' '&&content[j+1]==' '))
{
j++;
}
int end=j;
//cout<<end-begin<<endl;
content.erase(begin,end-begin);
}
i++;
}
return content;
}
//去注释
string removezhushi(string oldpath)
{
string content;
fstream oldfile(oldpath, ios::in);
if (oldfile.is_open())
{
const int size = 250; //设定每行的缓存大小
char temp[size];
oldfile.seekg(0, ios::beg); //指针移向文件头
while (oldfile.getline(temp, size)) //逐行读取,遇单行注释则终止
{
for (int i = 0; i < size; i++)
{
if (temp[i] == '/'&&temp[i + 1] == '/')
{
temp[i] = 0;
}
}
content+=temp;
content+=" ";
//newfile << temp << endl;
}
oldfile.close();
//newfile.close();
/*删除单行注释成功*/
}
else
{
cout << "源文件无法打开或无法创建新文件,请重试!\n";
}
//去掉多行注释
int i=0;
while(i<content.length()-1)
{
if(content[i]=='/'&&content[i+1]=='*'){
int startIndex=i;//定义注释开始部分
i+=2;//move to the charactor after *.
while(i<content.length()&&!(content[i]=='*'&&content[i+1]=='/'))
i++;
if(i==content.length()-1){
cout<<"illegal notation format!"<<endl;
break;
}
for(int j=startIndex;j<=i+1;j++) content[j]=' ';
}
i++;
}
return content;
}
//宏替换
string defination(string content)
{
string name1;
string name2;
//int pos;
int pos=content.find("#define");
while(pos != -1)
{
int ph=pos+8;
int pe=ph;
while(content[pe+1]!=' ')
{
pe++;
}
name1=content.substr(ph,pe-ph+1);
pe=pe+2;
int ph2=pe;
while(content[pe+1]!=';')
{
pe++;
}
name2=content.substr(ph2,pe-ph2+1);
int position = content.find(name1);查找指定的串
int index=0;
content.erase(pos,pe-pos+3);
while (position != -1)
{
if(index==0)
{
position = content.find(name1);//继续查找指定的串,直到所有的都找到为止
}
else
{
content.replace(position,name1.length(),name2);用新的串替换掉指定的串
position = content.find(name1);//继续查找指定的串,直到所有的都找到为止
}
index++;
}
pos=content.find("#define");
}
//cout<<content<<endl;
return content;
}
int judge_token(){
//预处理
init_token();
if(flag == 0){
ch = getc(fin);
}
flag = 1;
while(ch == ' ' || ch == '\t' || ch == '\n'){
if(ch == '\n'){
row++;
}
ch=getc(fin);//读下一个字符
}
token_num = 0;
if((ch>='a' && ch <= 'z') || (ch >= 'A' &&ch <= 'Z')){
//可能为标识符或者变量名
while((ch>='a' && ch <= 'z') || (ch >= 'A' &&ch <= 'Z') || (ch >= '0' && ch <= '9')){
token[token_num++] = ch;//将连续读入的字符存入token
ch = getc(fin);//
}
token[token_num++] = '\0';
for(int i = 0;i <13;i++){
if(strcmp(token,keyWord[i]) == 0){
//3为关键字
return 3;
}
}
//2为标识符
return 2;
}
//是数字
else if(ch >= '0' && ch <= '9'){
while((ch >= '0'&& ch <= '9') || ch == '.'){
token[token_num++] = ch;
ch = getc(fin);
}
return 1;
}
else{
token[token_num++] = ch;
switch(ch){
case '(': ch = getc(fin); return 16;
case ')': ch = getc(fin); return 17;
case '{': ch = getc(fin); return 33;
case '}': ch = getc(fin); return 34;
case '+':
ch = getc(fin);
if(ch == '+'){
token[token_num++] = ch;
ch = getc(fin);
return 29;
}
else{
return 18;
}
case '-':
ch = getc(fin);
if(ch == '-'){
token[token_num++] = ch;
ch = getc(fin);
return 30;
}
else{
return 19;
}
case '*':
ch = getc(fin);
if(ch == '/'){
token[token_num++] = ch;
ch = getc(fin);
return 32;
}
else{
return 20;
}
case '/':
ch = getc(fin);
if(ch == '*'){
token[token_num++] = ch;
ch = getc(fin);
return 31;
}else if(ch=='/')
{
token[token_num++] = ch;
ch = getc(fin);
return 31;
}
else{
return 21;
}
//这里要重新编码
case '=':
ch = getc(fin);
if(ch == '='){
token[token_num++] = ch;
ch = getc(fin);
return 23;
}
else{
return 22;
}
case '>':
ch = getc(fin);
if(ch == '='){
token[token_num++] = ch;
ch = getc(fin);
return 24;
}
else{
return 23;
}
case '<':
ch = getc(fin);
if(ch == '='){
token[token_num++] = ch;
ch = getc(fin);
return 26;
}
else{
return 25;
}
case ';': ch = getc(fin); return 27;
case '"': ch = getc(fin); return 28;
case '!':
ch = getc(fin);
if(ch == '='){
token[token_num++] = ch;
ch = getc(fin);
return 37;
}
else{
return 36;
}
case '#': ch = getc(fin); return -2;
case ',': ch = getc(fin); return 35;
case EOF: return -1;
default: ch = getc(fin); return -10;
}
}
}
void getWord(){
int temp;
while(1){
temp = judge_token();
if(temp==-1){
break;
}
switch(temp){
case -10:
cout<<"第 "<<row<<" 行出现错误."<<endl;
break;
default:
cout<<"("<<temp<<","<<token<<")"<<endl;//输出状态码和对应的
break;
}
}
cout<<"一共有"<<row<<"行"<<endl;
}
int main(){
fin = fopen("compiler.txt","r");
//去掉单行注释
string content=preprocess("compiler.txt");
//去多行注释
//string newcontent=deletemore(content);
getWord();
return 0;
}