【任务介绍】根据给定源语言的构词规则,从任意字符串中识别出该语言所有的合法的单词符号,并以等长的二元组形式输出。
【输入】字符串形式的源程序。
【输出】单词符号所构成的串(流),单词以等长的二元组形式呈现。
【题目】设计一个程序,根据给定源语言的构词规则,从任意字符串中识别出该语言所有的合法的单词符号,并以等长的二元组形式输出。
#include <iostream>
#include <string.h>
using namespace std;
// 全局变量,关键字表【c语言的32个关键字】
// 种别码:<auto, 1>...<while, 32>
static char keyword[32][10] = {
"auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"
};
// 界符,运算符表【36个, 可继续添加】
/*编码
<+,33> <-,34> <*,35> </,36> <<,37> <<=,38> <>,39> <>=,40> <=,41> <==,42> <!=,43> <;,44> <(,45> <),46>
<^,47> <,,48> <",49> <',50> <#,51> <&,52> <&&,53> <|,54> <||,55> <%,56> <~,57> <<<,58>左移 <>>,59>右移
<[,60> <],61> <{,62> <},63> <\,64> <.,65> <?,66> <:,67> <!,68> "[","]","{","}"
*/
static char operatorOrDelimiter[36][10] = {
"+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==", "!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
"&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{", "}", "\\", ".", "\?", ":", "!"
};
// 常数 <常数, 99>
// 标识符表 <标识符, 100>
static char identifier[1000][40] = {""};
//查找关键字
int searchKeyword(char keyword[][10], char s[])
{
for (int i = 0; i < 32; i++)
{
if (strcmp(keyword[i], s) == 0)
{
return i + 1; // 匹配则返回种别码,关键字种别码 1-32
}
}
return -1; //查找不成功,为标识符
}
//判断是否为字母
bool isLetter(char letter)
{
// a-z && A-Z && _
if (letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z' || letter == '_')
return true;
else
return false;
}
//判断是否为数字
bool isDigit(char digit)
{
if (digit >= '0' && digit <= '9')
return true;
else
return false;
}
//读入分析,核心部分(根据状态转化图)
void scanner(int &syn, char resourceCode[], char token[], int &p)
{
char ch;
int count = 0; //count指示token[],存入有用字符
ch = resourceCode[p];
while (ch == ' ' || ch == '\n' || ch == '\t') // 跳过空格和回车(结果因为tab(\t)找了好久的错误)
{
p++;
ch = resourceCode[p];
}
for (int i = 0; i < 20; i++)
{
token[i] = '\0'; // 每次收集前先清零
}
// 开头为字母
if (isLetter(resourceCode[p]))
{
token[count++] = resourceCode[p]; //写入token[]中
p++;
while (isLetter(resourceCode[p]) || isDigit(resourceCode[p]))
{
token[count++] = resourceCode[p]; // 后跟字母或数字继续写入token
p++;
}
token[count] = '\0';
syn = searchKeyword(keyword, token); // 查找种别码
if (syn == -1) // 若非关键字即为标识符
{
syn = 100; // 标识符种别码
}
return;
}
else if (ch == '-') // 如果是负数
{
token[count++] = resourceCode[p];
p++;
ch = resourceCode[p];
while (isDigit(resourceCode[p]))
{
token[count++] = resourceCode[p];
p++; //多读了一个字符既是下次将要开始的指针位置
}
token[count] = '\0';
syn = 99; //常数种别码99
}
else if (isDigit(resourceCode[p])) // 正数-起始字母为数字
{
while (isDigit(resourceCode[p]))
{
token[count++] = resourceCode[p];
p++;
}
token[count] = '\0';
syn = 99;
}
else if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ';' || ch == '(' || ch == ')'
|| ch == '^' || ch == ',' || ch == '\"' || ch == '\'' || ch == '~' || ch == '#' || ch == '%'
|| ch == '[' || ch == ']' || ch == '{' || ch == '}' || ch == '\\' || ch == '.' || ch == '\?'
|| ch == ':')
{
// 若为运算符或者界符,查表得到结果,单字符
token[0] = resourceCode[p];
token[1] = '\0'; // 形成单字符
for (int i = 0; i < 36; i++)
{
if (strcmp(token, operatorOrDelimiter[i]) == 0)
{
syn = 33 + i; // 获得种别码
break; // 查到后退出
}
}
p++;
return;
}
// 多目运算
else if (resourceCode[p] == '<') // <,<=,<<
{
p++;
if (resourceCode[p] == '=')
{
syn = 38;
}
else if (resourceCode[p] == '<')
{
p--;
syn = 58;
}
else
{
p--;
syn = 37;
}
p++;
return;
}
else if (resourceCode[p] == '>') // >,>=,>>
{
p++;
if (resourceCode[p] == '=')
syn = 40;
else if (resourceCode[p] == '>')
{
p--;
syn = 59;
}
else
{
p--;
syn = 39;
}
p++;
return;
}
else if (resourceCode[p] == '=')
{
p++;
if (resourceCode[p] == '=')
syn = 42; // ==
else
{
p--;
syn = 41; // =
}
p++;
return;
}
else if (resourceCode[p] == '!')
{
p++;
if (resourceCode[p] == '=') // !=
syn = 43;
else
{
syn = 68; // !
p--;
}
p++;
return;
}
else if (resourceCode[p] == '&')
{
p++;
if (resourceCode[p] == '&') // &&
syn = 53;
else
{
syn = 52; // &
p--;
}
p++;
return;
}
else if (resourceCode[p] == '|')
{
p++;
if (resourceCode[p] == '|')
syn = 55; // ||
else
{
syn = 54;
p--;
}
p++;
return;
}
else if (resourceCode[p] == '$') // 结束符
syn = 0; // 种别码为0
else
{
cout << "无法识别的字符>>>>> %c" << ch << endl;
exit(0);
}
}
int main(void)
{
char resourceCode[10000]; // 用于存入源码的数组
char token[20] = { 0 }; // 生成序列
int syn = -1; // 初始化
int p = 0; // 源程序指针
FILE *fp1, *fp2;
if ((fp1 = fopen("input.txt", "r")) == NULL)
{
cout << "无法打开文件!" << endl;
exit(0);
}
resourceCode[p] = fgetc(fp1);
// 将源程序读入数组,输出到屏幕
while (resourceCode[p] != '$')
{
p++;
resourceCode[p] = fgetc(fp1);
}
resourceCode[++p] = '\0';
cout << "input.txt 源程序为:" << endl;
cout << resourceCode << endl;
if ((fp2 = fopen("output.txt", "w+")) == NULL)
{
cout << "无法写入!" << endl;
exit(0);
}
// 指针复位,从头开始读
p = 0;
while (syn != 0)
{
// 启动扫描
scanner(syn, resourceCode, token, p);
if (syn == 100) // 标识符
{
for (int i = 0; i < 1000; i++) // 插入标识符表中
{
if (strcmp(identifier[i], token) == 0)
break; //已经在表中
if (strcmp(identifier[i], "") == 0)
{
// 查找空间
strcpy(identifier[i], token);
break;
}
}
cout << "<" << syn << ", " << token << ">" << endl;
fprintf(fp2, "<%d, %s>\n", syn, token);
}
else if (syn >= 1 && syn <= 32) // keyword
{
cout << "<" << syn << ", " << keyword[syn - 1] << ">" << endl;
fprintf(fp2, "<%d, %s>\n", syn, keyword[syn - 1]);
}
else if (syn >= 33 && syn <= 68)
{
cout << "<" << syn << ", " << operatorOrDelimiter[syn - 33] << ">" << endl;
fprintf(fp2, "<%d, %s>\n", syn, operatorOrDelimiter[syn - 33]);
}
else if (syn == 99) // 常数
{
cout << "<" << syn << ", " << token << ">" << endl;
fprintf(fp2, "<%d, %s>\n", syn, token);
}
}
system("pause");
return 0;
}
运行结果展示
处理后(部分截图)