词法分析代码
##待分析源程序
int main(){
int36 a=A7,b=5TU,z=a+b;
int c=10;
for(int i=0;i<10;i++){
if(c%2==0) z=z+10;
else z=z+1Z;
c=c+1;}
}
###词法分析器实现
#define _CRT_SECURE_NO_WARNINGS 1
#include <iostream>
#include <string>
#include <string.h>
#include <Windows.h>
using namespace std;
/* 单词编码 */
enum TokenCode
{
/*未定义*/
TK_UNDEF = 0,
/* 关键字 */
TK_KW,
/* 运算符 */
TK_OP,
/* 界符 */
TK_DL,
/* 数值常量 */
TK_INT, //整型常量
TK_DOUBLE, //浮点型常量
/*字符常量*/
TK_CHAR,
/* 标识符 */
TK_IDENT,
/*字符串常量*/
TK_STRING,
/*int36型常量*/
TK_INT36,
/*16进制常量*/
TK_INT16
};
/******************************************全局变量*****************************************************/
TokenCode code = TK_UNDEF; //记录单词的种别码
const int MAX = 33; //关键字数量
int row = 1; //记录字符所在的行数
string token = ""; //用于存储单词
string token1 = "";
string int36[10];
char keyWord[][10] = { "auto", "break", "case", "char",
"const", "continue","default", "do",
"double", "else", "enum", "extern",
"float", "for", "goto", "if",
"int", "long","register", "return",
"short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union",
"unsigned", "void","volatile", "while","int36" }; //存储关键词
/**********************************************函数*****************************************************/
/********************************************
* 功能:打印词法分析的结果
* code:单词对应的种别码
* token:用于存储单词
* row:单词所在的行数
*********************************************/
void print(TokenCode code)
{
switch (code)
{
/*未识别的符号*/
case TK_UNDEF:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_RED); //未识别的符号为红色
cout << "未识别的符号\t\t" << token << "\t(0, " << token << ")" << endl;
return;
break;
/*关键字*/
case TK_KW:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); //关键字为蓝色
cout << "关键字\t\t" << token << "\t(1, " << token << ")" << endl;
return;
break;
/* 运算符 */
case TK_OP:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_BLUE); //关键字为蓝色
cout << "运算符\t\t" << token << "\t(3, " << token << ")" << endl;
return;
break;
/* 分隔符 */
case TK_DL:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_BLUE); //关键字为蓝色
cout << "界符\t\t" << token << "\t(6, " << token << ")" << endl;
return;
break;
/* 数值常量 */
case TK_INT: //整型常量
case TK_DOUBLE: //浮点型常量
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED | FOREGROUND_GREEN); //常量为黄色
if (token.find('.') == token.npos)
cout << "数值常量\t\t" << token << "\t(4, " << token << ")" << endl;
else
cout << "数值常量\t\t" << token << "\t(4, " << token << ")" << endl;
return;
break;
/*字符常量*/
case TK_CHAR:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED | FOREGROUND_GREEN); //常量为黄色
cout << "字符常量\t\t" << token << "\t(5, " << token << ")" << endl;
return;
break;
/* 标识符 */
case TK_IDENT:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_BLUE); //标识符为蓝色
cout << "标识符\t\t" << token << "\t(2, " << token << ")" << endl;
return;
break;
case TK_STRING:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_GREEN); //字符串为绿色
cout << "字符串\t\t" << token << "\t(7, " << token << ")" << endl;
return;
break;
case TK_INT36:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_GREEN); //int36型常量为绿色
cout << "int36型常量\t\t" << token << "\t(8, " << token << ")" << endl;
return;
break;
case TK_INT16:
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_BLUE | FOREGROUND_GREEN); //关键字为绿色
cout << "16进制常量\t\t" << token << "\t(9, " << token << ")" << endl;
return;
break;
default:
break;
}
}
/********************************************
* 功能:判断是否是关键字
* MAX:关键字数量
* token:用于存储单词
*********************************************/
bool isKey(string token)
{
for (int i = 0; i < MAX; i++)
{
if (token.compare(keyWord[i]) == 0)
return true;
}
return false;
}
/********************************************
* 功能:返回关键字的内码值
* MAX:关键字数量
* token:用于存储单词
*********************************************/
int getKeyID(string token)
{
for (int i = 0; i < MAX; i++)
{ //关键字的内码值为keyWord数组中对应的下标加1
if (token.compare(keyWord[i]) == 0)
return 1;
}
return -1;
}
/********************************************
* 功能:判断一个字符是否是字母
* letter:被判断的字符
*********************************************/
bool isLLetter(char letter)
{
if ((letter >= 'a' && letter <= 'z'))
return true;
else return false;
}
bool isMLetter(char letter)
{
if ((letter >= 'A' && letter <= 'Z'))
return true;
else return false;
}
/********************************************
* 功能:判断一个字符是否是数字
* digit:被判断的字符
*********************************************/
bool isDigit(char digit)
{
if (digit >= '0' && digit <= '9')
return true;
else return false;
}
/********************************************
* 功能:词法分析
* fp:文件指针
* code:单词对应的种别码
* token:用于存储单词
* row:单词所在的行数
*********************************************/
void lexicalAnalysis(FILE* fp)
{
char ch;
while ((ch = fgetc(fp)) != EOF) //未读取到文件尾,从文件中获取一个字符
{
token = ch; //将获取的字符存入token中
if (ch == ' ' || ch == '\t' || ch == '\n') //忽略空格、Tab和回车
{
if (ch == '\n') //遇到换行符,记录行数的row加1
row++;
continue; //继续执行循环
}
else if (ch == '\"') {
ch = fgetc(fp); //去后面的字符
while (ch != '\"') {
token.push_back(ch);
ch = fgetc(fp);
}
token.push_back('\"'); //结尾的引号加入token
code = TK_STRING;
}
else if (isLLetter(ch)) //以字母开头,关键字或标识符
{
token = ""; //token初始化
token.push_back(ch); //将读取的字符ch存入token中
ch = fgetc(fp); //获取下一个字符
while (isLLetter(ch) || isDigit(ch) || ch == '_') //非字母或数字或下划线时退出,将单词存储在token中
{
token.push_back(ch); //将读取的字符ch存入token中
ch = fgetc(fp); //获取下一个字符
}
//文件指针后退一个字节,即重新读取上述单词后的第一个字符
fseek(fp, -1L, SEEK_CUR);
if (isKey(token)) { //关键字
code = TK_KW;
if (token.compare("int36") == 0) {
int i = 0, count = 0;
ch = fgetc(fp); count = count + 1;
if (ch == ' ')ch = fgetc(fp);
while (ch != ';') {
token1 = "";
while (isLLetter(ch) && ch != ';') {
token1.push_back(ch); //将读取的字符ch存入token中
ch = fgetc(fp); count++;
}
if (ch == '=') {
int36[i] = token1; i++;
}
if (ch == ';')break;
ch = fgetc(fp); count++;
}
while (count > 0) {
fseek(fp, -1L, SEEK_CUR); count--;
}
}
}
else //标识符
code = TK_IDENT; //单词为标识符
}
else if (isMLetter(ch)) {
token = ""; //token初始化
token.push_back(ch); //将读取的字符ch存入token中
ch = fgetc(fp); //获取下一个字符
while (isMLetter(ch) || isDigit(ch)) //非大写字母或数字或时退出,将单词存储在token中
{
token.push_back(ch); //将读取的字符ch存入token中
ch = fgetc(fp); //获取下一个字符
}
fseek(fp, -1L, SEEK_CUR);
code = TK_INT36;
}
else if (isDigit(ch)) //无符号常数以数字开头
{
int flag = 0;
//int isdouble = 0; //标记是否为浮点数
token = ""; //token初始化
while (isDigit(ch) || isMLetter(ch)) //当前获取到的字符为数字或大写字母
{
token.push_back(ch); //读取数字,将其存入token中
ch = fgetc(fp); //从文件中获取下一个字符
if (isMLetter(ch))flag = 1;
/* //该单词中第一次出现小数点
if (ch == '.' && isdouble == 0)
{
//小数点下一位是数字
if (isDigit(fgetc(fp)))
{
isdouble = 1; //标记该常数中已经出现过小数点
fseek(fp, -1L, SEEK_CUR); //将超前读取的小数点后一位重新读取
token.push_back(ch); //将小数点入token中
ch = fgetc(fp); //读取小数点后的下一位数字
}
}*/
}
if (flag == 1)
code = TK_INT36; //单词为int36型常量
else {
int sign = 0;
token1 = "";
int count = 0;
while (ch != '=' && ch != '<') {
fseek(fp, -1L, SEEK_CUR);
ch = fgetc(fp);
fseek(fp, -1L, SEEK_CUR); count++;
}
fseek(fp, -1L, SEEK_CUR); token1.push_back(fgetc(fp));
for (int i = 0; i < 3; i++) {
if (token1.compare(int36[i]) == 0) {
code = TK_INT16;
sign = 1;
while (count > 0) {
fseek(fp, +1L, SEEK_CUR); count--;
}
break;
}
}
if (sign == 0) {
while (count > 0) {
fseek(fp, +1L, SEEK_CUR); count--;
}
code = TK_INT;
}
}
fseek(fp, -1L, SEEK_CUR);
}
else if (ch == '_')
{
token = ""; //token初始化
while (isLLetter(ch) || isDigit(ch) || ch == '_') //非字母或数字或下划线时退出,将单词存储在token中
{
token.push_back(ch); //将读取的字符ch存入token中
ch = fgetc(fp); //获取下一个字符
}
//文件指针后退一个字节,即重新读取上述单词后的第一个字符
fseek(fp, -1L, SEEK_CUR);
code = TK_IDENT; //单词为标识符
}
else if (ch == '\'') {
token = "";
for (int i = 0; i < 3; i++)
{
token.push_back(ch);
ch = fgetc(fp);
}
fseek(fp, -1L, SEEK_CUR);
for (auto c = token.begin() + 1; c != token.end() - 1; c++)
{
if ((*c >= 'a' && *c <= 'z') || (*c >= 'A' && *c <= 'Z') || (*c >= '0' && *c <= '9'))
code = TK_CHAR;
else
code = TK_UNDEF;
}
}
else switch (ch)
{
/*运算符*/
case '+':
{
ch = fgetc(fp); //超前读取'+'后面的字符
if (ch == '+') //累加号++
{
token.push_back(ch); //将'+'后面的'='存入token中
code = TK_OP; //单词为"++"
}
else { //加号+
code = TK_OP; //单词为"+"
fseek(fp, -1L, SEEK_CUR); //将超前读取的字符重新读取
}
}
break;
case '-':
{
ch = fgetc(fp);
if (ch == '-')
{
token.push_back(ch);
code = TK_OP;
}
else {
code = TK_OP;
fseek(fp, -1L, SEEK_CUR);
}
}
break;
case '*': code = TK_OP; break;
case '/': code = TK_OP; break;
case '%': code = TK_OP; break;
case '=':
{
ch = fgetc(fp); //超前读取'='后面的字符
if (ch == '=') //==等于号
{
token.push_back(ch); //将'='后面的'='存入token中
code = TK_OP; //单词为"=="
}
else { //=赋值运算符
code = TK_OP; //单词为"="
fseek(fp, -1L, SEEK_CUR); //将超前读取的字符重新读取
}
}
break;
case '<':
{
ch = fgetc(fp); //超前读取'<'后面的字符
if (ch == '=') //<=小于等于号
{
token.push_back(ch); //将'<'后面的'='存入token中
code = TK_OP; //单词为"<="
}
else { //<小于号
code = TK_OP; //单词为"<"
fseek(fp, -1L, SEEK_CUR); //将超前读取的字符重新读取
}
}
break;
case '>':
{
ch = fgetc(fp); //超前读取'>'后面的字符
if (ch == '=') //>=大于等于号
{
token.push_back(ch); //将'>'后面的'='存入token中
code = TK_OP; //单词为">="
}
else { //>大于号
code = TK_OP; //单词为">"
fseek(fp, -1L, SEEK_CUR); //将超前读取的字符重新读取
}
}
break;
case '&':
{
ch = fgetc(fp); //超前读取'>'后面的字符
if (ch == '&') //>=大于等于号
{
token.push_back(ch); //将'>'后面的'='存入token中
code = TK_OP; //单词为">="
}
else { //>大于号
code = TK_OP; //单词为">"
fseek(fp, -1L, SEEK_CUR); //将超前读取的字符重新读取
}
}
break;
case '|':
{
ch = fgetc(fp); //超前读取'>'后面的字符
if (ch == '|') //>=大于等于号
{
token.push_back(ch); //将'>'后面的'='存入token中
code = TK_OP; //单词为">="
}
else { //>大于号
code = TK_OP; //单词为">"
fseek(fp, -1L, SEEK_CUR); //将超前读取的字符重新读取
}
}
break;
/*分界符*/
case '(': code = TK_DL; break;
case ')': code = TK_DL; break;
case '[': code = TK_DL; break;
case ']': code = TK_DL; break;
case '{': code = TK_DL; break;
case '}': code = TK_DL; break;
case ',': code = TK_DL; break;
case ';': code = TK_DL; break;
//未识别符号
default: code = TK_UNDEF; break;
}
print(code); //打印词法分析结果
}
}
int main()
{
string filename; //文件路径
FILE* fp; //文件指针
while (true) {
if ((fp = fopen("0-1背包.txt", "r")) != NULL) //打开文件
break;
else
{
cout << "路径输入错误!" << endl;
break;
} //读取失败
}
cout << "/=***************************词法分析结果***************************=/" << endl;
lexicalAnalysis(fp); //词法分析
fclose(fp); //关闭文件
return 0;
}