在基本要求中,增加了一个学生,姓名,的基本字
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MAXSIZE 999
#define norw 13 //关键字个数
#define txmax 999 //名字表容量
#define al 16 //符号最大长度
#define NAME "姓名"
#define STUNUM "学号"
#define REALNAME "董楚梁"
//单词种别,nul表示不能识别
enum symbol
{
//非法,标识符,数字
nul, ident, number,
//运算符
plus, minus, times, slash, eql, neq, lss, leq, gtr, geq, becomes,
//界符
lparen, rparen, comma, semicolon, period,
//基本字13 +1(自己的"姓名""学号""董楚梁")
beginsym, endsym, ifsym, thensym, whilesym, writesym, readsym, dosym, callsym, oddsym, constsym, varsym, procsym, yessym
};
enum symbol wsym[norw+1]; //保留字对应的符号值,1自己定义基本字
enum symbol ssym[256]; //单字符的符号值
//名字表结构
struct tablestruct {
char name[al + 1]; //符号
symbol sym; //符号种别
}table[txmax];
char str[MAXSIZE]; //存储文件内容
FILE* fin;
int t = 0; //计数,在给table赋值时使用
//保留字
char word[norw][al] = { "begin","call","const","do","end","if","odd",
"procedure","read","then","var","while","write" };//顺序排列,方便二分查找
bool IsChinese(char ch)
{
if ((ch >> 8) == -1)
return true;
return false;
}
//bool IsChinese(char ch)
//{
// if(ch>=0&&ch<=127)
// return true;
// return false;
//}
//读入并保存文件内容,保存到str[MAXSIZE]中
void read() {
char ch;
strcpy(str, "");
int i = 0;
if (!fin)
{
printf("打开文件失败!\n");
exit(-1);
}
while ((ch = fgetc(fin)) != EOF)
{
if (ch != '\n' && ch != '\t')
str[i++] = ch;
else //换行和Tab均保存为空格
str[i++] = ' ';
}//读入文件中内容
str[i] = '\0';
//printf("%s", str);
fclose(fin);
}
//初始化
void init() {
read();
//设置单字符符号
for (int i = 0; i < 256; i++)
{
ssym[i] = nul;
}
ssym['+'] = plus;
ssym['-'] = minus;
ssym['*'] = times;
ssym['/'] = slash;
ssym['('] = lparen;
ssym[')'] = rparen;
ssym['='] = eql;
ssym[','] = comma;
ssym['.'] = period;
ssym['#'] = neq;
ssym[';'] = semicolon;
//保留字对应符号
wsym[0] = beginsym;
wsym[1] = callsym;
wsym[2] = constsym;
wsym[3] = dosym;
wsym[4] = endsym;
wsym[5] = ifsym;
wsym[6] = oddsym;
wsym[7] = procsym;
wsym[8] = readsym;
wsym[9] = thensym;
wsym[10] = varsym;
wsym[11] = whilesym;
wsym[12] = writesym;
wsym[13] = yessym;
}
//获取符号种别,并且存储到结构体数组中
void getsym() {
int len = strlen(str);
//printf("符号长度%d\n", len);
int j = 0, k=0, pos = 0;
char s[al + 1];//临时符号
while (pos < len && j < len)
{
strcpy(s, "");//s初始化
j = k = 0;
//空格直接跳过
while (str[pos] == ' ')
{
pos++;
}
//以字母开头,可能是标识符或保留字,英文或者字符
if (!IsChinese(str[pos])&&pos<len)//英文打头
{
if (str[pos] >= 'a' && str[pos] <= 'z')
{
j = pos;
int flag=0;
do
{
if (flag< al)
{
s[k++] = str[j++];
flag++;
}
else
{
j++;
}
} while ((str[j] >= 'a' && str[j] <= 'z') || (str[j] >= '0' && str[j] <= '9'));
s[k] = '\0';
int m = 0, n = norw - 1;
//折半查找,看字符串是否是保留字
do {
k = (m + n) / 2;
if (strcmp(s, word[k]) <= 0)
{
n = k - 1;
}
if (strcmp(s, word[k]) >= 0)
{
m = k + 1;
}
} while (m <= n);
strcpy(table[t].name, s);
if (m - 1 > n) //基本字
{
table[t].sym = wsym[k];
}
else //标识符
{
table[t].sym = ident;
}
t++;
pos = j;
}
else
{
if (str[pos] >= '0' && str[pos] <= '9') //数字
{
j = pos;
table[t].sym = number;
while (str[j] >= '0' && str[j] <= '9')
s[k++] = str[j++];
s[k] = '\0';
strcpy(table[t].name, s);
pos = j;
t++;
}
else
{
if (str[pos] == ':') {
if (str[pos + 1] == '=') {//:=运算符
table[t].sym = becomes;
strcpy(table[t].name, ":=");
pos += 2;
}
else {//非法字符,设置为nul
table[t].sym = nul;
s[0] = ':'; s[1] = '\0';
strcpy(table[t].name, s);
pos++;
}
t++;
}
else
{
if (str[pos] == '<') {
if (str[pos + 1] == '=') {// <=
table[t].sym = leq;
strcpy(table[t].name, "<=");
pos += 2;
}
else {// <
table[t].sym = lss;
strcpy(table[t].name, "<");
pos++;
}
t++;
}
else
{
if (str[pos] == '>') {
if (str[pos + 1] == '=') {// >=
table[t].sym = geq;
strcpy(table[t].name, ">=");
pos += 2;
}
else {// >
table[t].sym = gtr;
strcpy(table[t].name, ">");
pos++;
}
t++;
}
else
{//其它单个字符
table[t].sym = ssym[str[pos]];
s[0] = str[pos]; s[1] = '\0';
strcpy(table[t].name, s);
t++;
pos++;
}
}
}
}
}
}
else//中文
{
j = pos;
int flag = 0;
do
{
if (flag < al)
{
s[k++] = str[j++];
flag++;
}
else
{
j++;
}
} while ((str[j] >> 8) == -1|| (str[j] >= '0' && str[j] <= '9'));
s[k] = '\0';
strcpy(table[t].name, s);
int ret1 = strcmp(s, STUNUM);
int ret2 = strcmp(s, NAME);
int ret3 = strcmp(s, REALNAME);
if (!ret1 || !ret2 || !ret3)
{
table[t].sym = wsym[13];
}
t++;
pos = j;
}
}
}
//输出表内容
void print() {
int i;
for (i = 0; i < t; i++)
{
printf("%-15s ", table[i].name);
if (table[i].sym == 0)
{
printf("非法");
}
else if (table[i].sym == 1)
{
printf("标识符");
}
else if (table[i].sym == 2)
{
printf("数字");
}
else if (table[i].sym >= 3 && table[i].sym <= 13)
{
printf("运算符");
}
else if (table[i].sym >= 14 && table[i].sym <= 18)
{
printf("界符");
}
else
{
printf("基本字");
}
printf("\n");
}
}
int main()
{
fin = fopen("a.txt", "r");
init();
getsym();
print();
system("pause");
return 0;
}
const a=10;
var b,c;
procedure p;
begin
c:=b+a;
end;
begin
姓名:董楚梁1
end.
学号:202002156023
实验结果: