1.采用文件形式输入代码,comp.cpp
#include<iostream>
#include"globalvar.h"
#include "semantic.h"
using namespace std;
FILE* file;
char filename[] = "test.txt";
void Compiler(char filename[])
{
file = fopen(filename, "r");
if (file)
{
cout << "打开文件成功!开始读取文件。。。" << endl;
program();
}
else
{
cout << "打开文件失败!" << endl;
exit(0);
}
}
int main()
{
Compiler(filename);
return 0;
}
2.program为语法分析的程序,在语法分析器中得到有效单词,而单词需要通过词法分析实现,所以语法分析使用词法分析的返回值。
3.词法分析器 Lexical.cpp
#include"globalvar.h"
#include<iostream>
extern FILE* file;
#define BUFLEN 80//缓冲区大小
char* lineLen = NULL;//缓冲区内的数据长度
int readPos = -1;//读取位置
char line[BUFLEN];//缓冲区
int lineNum = 1;//行号
int colNum = 0;//列号
int error;//错误终止
char lastch = ' ';//上一个字符
char ch = ' ';//当前字符
int f=0;//scan()返回标记
int scan()
{
if (ch == 0)//一行缓冲区读取完毕
{
lineLen = fgets(line, BUFLEN, file);//重新加载缓冲区数据
if (lineLen == NULL)//没有数据
{
line[0] = -1;//文件结束
}
readPos = -1; //恢复读取位置
}
readPos++;
ch = line[readPos];//获取新字符
if (lastch == '\n')
{
lineNum++;
colNum = 0;
}
if (ch == -1)//文件结束,自动关闭
{
fclose(file);
file = NULL;
return -1;
}
else if (ch != '\n')
colNum++;
lastch = ch;
return 0;
}
enum Tag token = NW_NULL;//当前检测到的符号类别
char idname[idLen + 1];//存放标识符
int val = 0;//存放数字
char str[stringLen + 1];//存放字符串
char letter = 0;//存放字符
bool digit() {
return ch >= '0' && ch <= '9' ? 1 : 0;
}
bool cha() {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ? 1 : 0;
}
#define reservedNum 18
static char reservedTable[reservedNum][idLen] = {
"break",
"case","char","cin","continue","cout",
"default","do",
"else","extern",
"for",
"if","int",
"return",
"string","switch",
"void",
"while"};
static enum Tag reservedSymbol[reservedNum] = {
NW_BREAK,
NW_CASE,NW_CHAR,NW_CIN,NW_CONTINUE,NW_COUT,
NW_DEFAULT,NW_DO,
NW_ELSE,NW_EXTERN,
NW_FOR,
NW_IF,NW_INT,
NW_RETURN,
NW_STRING,NW_SWITCH,
NW_VOID,
NW_WHILE
};
void checkKeyword()//检查是否为关键字
{
int i = 0, j = reservedNum - 1, k = 0;
do
{
k = (i + j) / 2;//折半查找
if (strcmp(idname, reservedTable[k]) < 0)
{
j = k - 1;
}
else if (strcmp(idname, reservedTable[k]) > 0)
{
i = k + 1;
}
else//找到了,是关键字
{
token = reservedSymbol[k];
break;
}
} while (i <= j);
if (i > j)
token = ID;//搜索失败,是标识符
}
int tokenize()
{
while (ch == ' ' || ch == '\n' || ch == '\t'||ch==0)//跳过无效字符
{
f=scan();
}
if (f == -1)
{
token = NW_NULL;
return -1;
}
//标识符(关键字)
if (cha() || ch == '_')
{
int idCount = 0;//为标识符的长度计数
int reallen = 0;//实际标识符长度
int f;//getChar返回标记,作用:ch取下一个字符
//取标识符
do {
reallen++;
if (idCount < idLen)
{
idname[idCount] = ch;
idCount++;
}
f = scan();
} while (cha() || ch == '_' ||digit());
idname[idCount] = 0;//标识符结束
if (reallen > idLen)//标识符过长
{
lexerror(id2long, 0);
}
checkKeyword();//检查关键字
return f;
}
//数字
else if (digit())
{
token = NUM;
int numCount = 0;//为数字的长度计数
val = 0;//数值迭代器
int reallen = 0;//实际数字长度
do {
reallen++;
if (numCount < numLen)
{
val = ch - '0' + val * 10;
numCount++;
}
f = scan();
} while (digit());
if (reallen > numLen)//数字过长
{
lexerror(num2long, 0);
}
return f;
}
else
{
//界符
switch (ch) {
case'+':
token = ADD;
GET_CHAR;
break;
case'-':
token = SUB;
GET_CHAR;
break;
case'*':
token = MUL;
GET_CHAR;
break;
case'/':
GET_CHAR;
token = DIV;
if (ch == '/') {//单行注释
token = NW_NULL;
while (ch != '\n')
GET_CHAR;
GET_CHAR;
}
else if (ch == '*') {//多行注释
token = NW_NULL;
do {
GET_CHAR;
if (ch == '*')
{
GET_CHAR;
if (ch == '/')
break;
}
} while (1);
GET_CHAR;
}
break;
case'>':
token = GT;
GET_CHAR;
if (ch == '=')
{
token = GE;
GET_CHAR;
}
else if (ch == '>')
{
token = INPUT;
GET_CHAR;
}
break;
case'<':
token = LT;
GET_CHAR;
if (ch == '=')
{
token = LE;
GET_CHAR;
}
else if (ch == '<')
{
token = OUTPUT;
GET_CHAR;
}
break;
case'=':
token = ASSIGN;
GET_CHAR;
if (ch == '=')
{
token = EQU;
GET_CHAR;
}
break;
case'&':
token = LEA;
GET_CHAR;
if (ch == '&')
{
token = AND;
GET_CHAR;
}
break;
case'|':
token = NW_NULL;
GET_CHAR;
if (ch == '|')
{
token = OR;
GET_CHAR;
}
break;
case'!':
token = NOT;
GET_CHAR;
if (ch == '=')
{
token = NEQU;
GET_CHAR;
}
break;
case',':
token = COMMA;
GET_CHAR;
break;
case':':
token= COLON;
GET_CHAR;
break;
case';':
token = SEMICON;
GET_CHAR;
break;
case'(':
token = LPAREN;
GET_CHAR;
break;
case')':
token = RPAREN;
GET_CHAR;
break;
case'[':
token =LBRACK;
GET_CHAR;
break;
case']':
token = RBRACK;
GET_CHAR;
break;
case'{':
token = LBRACE;
GET_CHAR;
break;
case'}':
token = RBRACE;
GET_CHAR;
break;
case -1:
lexerror(charwrong, 0);
return -1;
default:
token = EXCEP;
lexerror(excpchar, ch);
//虽然是词法错误,但是不影响语法语义的分析过程,暂且定位为警告
GET_CHAR;
}
}
return 0;
}