本文概述
本文讲述了算符优先文法的原理及如何用C语言实现。并在给出的代码中实现了FirstVt集,LastVt集的计算输出,构造优先关系表并输出,和对任意输入字符串的分析
实验目的
了解用算符优先法对表达进行语法分析的方法,掌握自顶向下的预测语法分析程序的构造方法。
实验内容
(一)实验原理
算符优先分析法(Operator Precedence Parsing)是一种自底向上的语法分析方法,主要用于解析算术表达式和其他类似的文法。这种方法通过比较算符之间的优先级和结合性来决定何时进行归约,从而逐步将输入字符串转换为文法的句柄。
优先关系
算符优先分析法通过定义算符之间的优先关系来决定归约的顺序。这些优先关系包括:
- a > b:表示算符 a 的优先级高于算符 b。
- a < b:表示算符 a 的优先级低于算符 b。
- a = b:表示算符 a 和算符 b 的优先级相等。
文法要求
算符优先分析法适用于算符文法,并且两算符之间最多只有一个优先关系。
Firstvt集和LastVT集
FIRSTVT(A):集合包含所有可以从非终结符 A 导出的字符串的第一个终结符。
LASTVT(A):集合包含所有可以从非终结符 A 导出的字符串的最后一个终结符。
构造方法:
优先关系表
构造算法:
优先关系表示例:
分析过程
算符优先分析器的工作过程如下:
- 初始化栈:分析栈和关系栈中最初包含一个结束标志符 # 。
- 读取输入:读取输入字符串的第一个符号b。
- 比较优先关系:
如果从栈顶开始第一个终结符是 a,且当前输入符号是 b,则根据优先关系表进行以下操作:- 如果 a < b,则把a压入分析栈,继续读取下一个输入符号。
- 如果 a > b,则尝试进行归约。
- 如果 a = b,则把a压入分析栈,继续读取下一个输入符号。
- 归约:
如果栈顶符号是非终结符 A,且当前栈中的符号序列可以归约为某个产生式的右侧,则进行归约,将产生式的左侧符号压入栈中。 - 结束条件:
- 如果栈中只剩下文法的起始符号和底部标志符 #,且输入字符串已完全读取,则分析成功。
- 如果优先关系表中没有记录a与b之间的关系,则报告输入字符串非法。
- 分析过程示例:
(二)程序结构及功能介绍
程序总流程为:从文件中读取文法 -> 输出文法信息 -> 计算FirstVt集并输出 -> 计算LastVt集并输出 -> 判断是否是算符优先文法 -> 构造算符优先表并输出 -> 手动输入要分析的字符串 -> 输出分析过程和结果
读取文件
文法从文件中读取,事先准备一个规则.txt文件,如下:
非终结符要求必须是大写字母,一行只能有一条规则,可用‘|’表示或,空用$表示
程序读取后输出如下:
//产生式的读取,非终结符必须为大写字母,$表示空
void read_rule(char* rulefile, noterminal* h_noter, terminal* h_ter);
//产生式输出
void print_rule(noterminal* h_noter, terminal* h_ter);
FirstVt,LastVt集的计算输出
程序会计算出每个非终结符对应的两个相关集,为构造算符优先表做准备:
void make_firstvt(noterminal* h_noter, terminal* h_ter); //计算FirstVt集
void make_lastvt(noterminal* h_noter, terminal* h_ter); //计算lastvt集
判断是否为算符优先文法
在FIrstVt和LastVt出来后就可以构造预测分析表了,在构造表的同时进行是否是算符优先文法的判断。如果一对终结符之间不止一个算符关系,即在把算符关系填入单元格时发现单元格中已经有了,就说明此文法不是算符优先文法。此时程序会输出判断结果然后直接退出:
void error_treat(); //错误处理
构造算符优先表
根据产生式,FirstVt集,LastVt集构造算符优先表:
void make_patable(noterminal* h_noter, terminal* h_ter); //构造优先关系表
输入与分析
最后,手动输入字符串,程序会进行分析并输出:
int sf_analyse(noterminal* h_noter, terminal* h_ter,char*st); //算符优先分析
char guiyue(char* st, noterminal* h_noter); //归约处理
(三)完整代码
#include<stdio.h>
#include <string.h>
#include <stdlib.h>
//非终结符
typedef struct noterminal
{
char ch;
char* FirstVt; //对应firstvt集
char* Lastvt; //对应lastvt集
char* guize[10]; //对应产生式
int size = 0; //产生式个数
struct noterminal* next;
}noterminal;
//终结符
typedef struct terminal
{
int number;
char ch;
char* patable; //优先关系表
struct terminal* next;
}terminal;
//头指针
noterminal* h_noter;
terminal* h_ter;
noterminal* check_noterminal(char ch, noterminal* h_noter); //检查非终结符是否已拥有
terminal* check_terminal(char ch, terminal* h_ter); //检查终结符是否已拥有
bool in_char(char* string, char ch); //检查字符串string中是否有字符ch
int combin(char* ch, char* bech); //连接字符串ch和bech于ch
void read_rule(char* rulefile, noterminal* h_noter, terminal* h_ter); //产生式的读取,非终结符必须为大写字母,$表示空
void print_rule(noterminal* h_noter, terminal* h_ter); //产生式输出
void free_list(noterminal* h_noter, terminal* h_ter); //释放链表空间
void make_firstvt(noterminal* h_noter, terminal* h_ter); //计算FirstVt集
void make_lastvt(noterminal* h_noter, terminal* h_ter); //计算lastvt集
void make_patable(noterminal* h_noter, terminal* h_ter); //构造优先关系表
void error_treat(); //错误处理
int sf_analyse(noterminal* h_noter, terminal* h_ter,char*st); //算符优先分析
char guiyue(char* st, noterminal* h_noter); //归约处理
int main() {
char rulefile[100] = "D:\\新建文件夹\\Desktop\\编译原理\\实验二\\规则4.txt";
//初始化
h_noter = (noterminal*)malloc(sizeof(noterminal));
h_noter->next = NULL;
h_noter->FirstVt = NULL;
h_noter->Lastvt = NULL;
h_noter->size = 0;
h_ter = (terminal*)malloc(sizeof(terminal));
h_ter->next = NULL;
//读取文法
read_rule(rulefile, h_noter, h_ter);
//输出文法
print_rule(h_noter, h_ter);
//求firstvt集
make_firstvt(h_noter, h_ter);
//求lastvt集
make_lastvt(h_noter, h_ter);
//求算法优先表
make_patable(h_noter, h_ter);
//对输入符号串进行分析
char st[100];
int h = 1;
while (h) {
printf("\n\n\t请输入要分析的字符串:");
scanf_s("%s", st, 99);
int sign = sf_analyse(h_noter, h_ter, st);
if (sign == 0) printf("\t非法输入!\n\n");
else printf("\t合法输入!\n\n");
printf("\t是否继续输入(0退出,1继续):");
scanf_s("%d", &h);
}
free_list(h_noter, h_ter);
return 1;
}
noterminal* check_noterminal(char ch, noterminal* h_noter) {
int sign = 0;
noterminal* h_n = h_noter;
while (h_n->next != NULL)
{
h_n = h_n->next;
if (h_n->ch == ch) {
sign = 1;
break;
}
};
if (sign == 0) return NULL;
else return h_n;
}
terminal* check_terminal(char ch, terminal* h_ter) {
int sign = 0;
terminal* h_n = h_ter;
while (h_n->next != NULL)
{
h_n = h_n->next;
if (h_n->ch == ch) {
sign = 1;
break;
}
};
if (sign == 0) return NULL;
else return h_n;
}
bool in_char(char* string, char ch) {
int coutn = strlen(string);
int i = 0;
for (; i < coutn; i++) {
if (string[i] == ch) break;
}
return coutn == i ? false : true;
}
int combin(char* ch, char* bech) {
int cont = strlen(ch);
int sign = 0;
for (int i = 0; bech[i] != 0; i++) {
if (bech[i] != '$' && !in_char(ch, bech[i])) {
ch[cont++] = bech[i];
sign = 1;
}
}
return sign;
}
void free_list(noterminal* h_noter, terminal* h_ter) {
noterminal* noter = h_noter;
terminal* ter = h_ter;
while (h_noter != NULL) {
h_noter = h_noter->next;
free(noter);
noter = h_noter;
}
while (h_ter != NULL) {
h_ter = h_ter->next;
free(ter);
ter = h_ter;
}
}
void read_rule(char* rulefile, noterminal* h_noter, terminal* h_ter) {
FILE* yuan = NULL;
noterminal* noter, * r_noter = h_noter;
terminal* ter, * r_ter = h_ter;
//非终结符加入'#'
ter = (terminal*)malloc(sizeof(terminal));
ter->ch = '#';
ter->number = 0;
ter->next = NULL;
r_ter->next = ter;
r_ter = ter;
char ch;
int size = 1; //终结符个数
if (fopen_s(&yuan, rulefile, "r") != 0) {
printf("规则 文件打开失败!");
exit(1);
}
while ((ch = fgetc(yuan)) != -1) {
if (64 < ch && ch < 91) {
noterminal* h_n = check_noterminal(ch, h_noter);
//增加非终结符结点
if (h_n == NULL) {
noter = (noterminal*)malloc(sizeof(noterminal));
noter->ch = ch;
noter->next = NULL;
noter->FirstVt = NULL;
noter->Lastvt = NULL;
noter->size = 0;
r_noter->next = noter;
r_noter = noter;
}
else {
noter = h_n;
}
//读取产生式
char chsh[10];
for (int i = 0; i < 2; i++) ch = fgetc(yuan);
int quit = 0;
while ((ch = fgetc(yuan))) {
if (ch == '|' || ch == '\n' || ch == -1) {
//存储产生式
chsh[quit] = '\0';
quit = 0;
noter->guize[noter->size++] = (char*)malloc(sizeof(char[10]));
int s = 0;
do {
noter->guize[noter->size - 1][s] = chsh[s];
} while (chsh[s++] != '\0');
if (ch == '\n' || ch == -1) {
break;
}
}
else {
chsh[quit++] = ch;
//增加终结符
if (ch < 65 || ch>90) {
terminal* h_t = check_terminal(ch, h_ter);
if (h_t == NULL) {
ter = (terminal*)malloc(sizeof(terminal));
ter->ch = ch;
ter->next = NULL;
ter->number = size;
r_ter->next = ter;
r_ter = ter;
size++;
}
}
}
}
}
else {
printf("产生式错误");
}
}
//初始化优先关系表
ter = h_ter;
while (ter->next != NULL) {
ter = ter->next;
ter->patable = (char*)malloc(size+1);
for (int i = 0; i < size; i++) ter->patable[i] = ' ';
ter->patable[size] = 0;
}
fclose(yuan);
}
void print_rule(noterminal* h_noter, terminal* h_ter) {
printf("\t产生式:\n");
noterminal* noter = h_noter;
int i = 0;
while (noter->next != NULL) {
noter = noter->next;
for (int j = 0; j < noter->size; j++) {
printf("\t%-10d %-10c %-10s\n", ++i, noter->ch, noter->guize[j]);
}
}
printf("\n\t非终结符:\n\t");
noter = h_noter;
while (noter->next != NULL) {
noter = noter->next;
printf("%-10c", noter->ch);
}
printf("\n\n\t终结符:\n\t");
terminal* ter = h_ter;
while (ter->next != NULL) {
ter = ter->next;
printf("%-10c", ter->ch);
}
printf("\n\n\n");
}
void make_firstvt(noterminal* h_noter,terminal* h_ter) {
int sign = 0;
char ch;
noterminal* noter = h_noter;
do {
noter = h_noter;
sign = 0;
//循环终结符
while (noter->next != NULL) {
noter = noter->next;
//初始化
if (noter->FirstVt == NULL) {
noter->FirstVt = (char*)malloc(sizeof(char[20]));
for (int i = 0; i < 20; i++) noter->FirstVt[i] = 0;
}
//循环每个终结符的产生式
for (int i = 0; i<noter->size; i++) {
ch = noter->guize[i][0];
//为终结符
if (ch < 63 || ch>90) {
if (!in_char(noter->FirstVt, ch)) {
noter->FirstVt[strlen(noter->FirstVt)] = ch;
sign = 1;
}
}
//为非终结符
else {
noterminal* r_noter = check_noterminal(ch, h_noter);
if (r_noter->FirstVt != NULL) {
sign += combin(noter->FirstVt, r_noter->FirstVt);
}
ch = noter->guize[i][1];
if (ch < 63 || ch>90) {
if (!in_char(noter->FirstVt, ch) && ch !=0) {
noter->FirstVt[strlen(noter->FirstVt)] = ch;
sign = 1;
}
}
else {
error_treat();
}
}
}
}
} while (sign);
//输出firstvt
noter = h_noter;
printf("\tFIRSTVT集:\n\n");
while (noter->next != NULL) {
noter = noter->next;
printf("\t%-10c", noter->ch);
printf("firstvt=%-10s\n", noter->FirstVt);
}
printf("\n\n");
}
void make_lastvt(noterminal* h_noter, terminal* h_ter) {
int sign = 0;
char ch;
noterminal* noter = h_noter;
do {
noter = h_noter;
sign = 0;
//循环终结符
while (noter->next != NULL) {
noter = noter->next;
//初始化
if (noter->Lastvt == NULL) {
noter->Lastvt = (char*)malloc(sizeof(char[20]));
for (int i = 0; i < 20; i++) noter->Lastvt[i] = 0;
}
//循环每个终结符的产生式
for (int i = 0; i < noter->size; i++) {
ch = noter->guize[i][strlen(noter->guize[i]) - 1];
//为终结符
if (ch < 63 || ch>90) {
if (!in_char(noter->Lastvt, ch)) {
noter->Lastvt[strlen(noter->Lastvt)] = ch;
sign = 1;
}
}
//为非终结符
else {
noterminal* r_noter = check_noterminal(ch, h_noter);
if (r_noter->Lastvt != NULL) {
sign += combin(noter->Lastvt, r_noter->Lastvt);
}
if (strlen(noter->guize[i]) == 1) continue;
ch = noter->guize[i][strlen(noter->guize[i]) - 2];
if (ch < 63 || ch>90) {
if (!in_char(noter->Lastvt, ch)) {
noter->Lastvt[strlen(noter->Lastvt)] = ch;
sign = 1;
}
}
else {
error_treat();
}
}
}
}
} while (sign);
//输出Lastvt
noter = h_noter;
printf("\tLASTVT集:\n\n");
while (noter->next != NULL) {
noter = noter->next;
printf("\t%-10c", noter->ch);
printf("lastvt=%-10s\n", noter->Lastvt);
}
printf("\n\n");
}
void make_patable(noterminal* h_noter, terminal* h_ter) {
noterminal* noter = h_noter,*r_noter;
terminal* ter1, * ter2;
while (noter->next != NULL) {
noter = noter->next;
for (int i = 0; i < noter->size; i++) {
for (int j = 0; j < strlen(noter->guize[i])-1; j++) {
if (noter->guize[i][j] < 63 || noter->guize[i][j]>90) {
ter1 = check_terminal(noter->guize[i][j],h_ter);
if (noter->guize[i][j + 1] < 63 || noter->guize[i][j + 1]>90) {
ter2 = check_terminal(noter->guize[i][j + 1], h_ter);
if (ter1->patable[ter2->number] != ' ')
error_treat();
ter1->patable[ter2->number] = '=';
}
else {
r_noter = check_noterminal(noter->guize[i][j + 1], h_noter);
for (int q = 0; q < strlen(r_noter->FirstVt); q++) {
ter2 = check_terminal(r_noter->FirstVt[q], h_ter);
if (ter1->patable[ter2->number] != ' ')
error_treat();
ter1->patable[ter2->number] = '<';
}
if (j < strlen(noter->guize[i]) - 2 && (noter->guize[i][j + 2] < 63 || noter->guize[i][j + 2]>90)) {
ter2 = check_terminal(noter->guize[i][j + 2],h_ter);
if (ter1->patable[ter2->number] != ' ')
error_treat();
ter1->patable[ter2->number] = '=';
}
}
}
else {
if (noter->guize[i][j + 1] < 63 || noter->guize[i][j + 1]>90) {
ter1 = check_terminal(noter->guize[i][j + 1], h_ter);
r_noter = check_noterminal(noter->guize[i][j], h_noter);
for (int q = 0; q < strlen(r_noter->Lastvt); q++) {
ter2 = check_terminal(r_noter->Lastvt[q], h_ter);
if (ter2->patable[ter1->number] != ' ')
error_treat();
ter2->patable[ter1->number] = '>';
}
}
else {
error_treat();
}
}
}
}
}
//初始化'#'关系表
noter = h_noter->next;
ter1 = h_ter->next;
for (int e = 0; e < strlen(noter->FirstVt); e++) {
ter2 = check_terminal(noter->FirstVt[e], h_ter);
ter1->patable[ter2->number] = '<';
}
for (int e = 0; e < strlen(noter->Lastvt); e++) {
ter2 = check_terminal(noter->Lastvt[e], h_ter);
ter2->patable[ter1->number] = '>';
}
ter1->patable[0] = '=';
//输出优先关系表
printf("\t优先关系表:\n\n\t%-10c",' ');
terminal* ter = h_ter;
while (ter->next != NULL) {
ter = ter->next;
printf("%-10c", ter->ch);
}
ter = h_ter;
int size = strlen(ter->next->patable);
while (ter->next != NULL) {
ter = ter->next;
printf("\n\n\t");
printf("%-10c", ter->ch);
for (int w = 0; w < size; w++) {
printf("%-10c", ter->patable[w]);
}
}
printf("\n\n\n");
}
void error_treat() {
printf("\t-----------------此文法不是算符优先文法-----------------\n\n\n");
free_list(h_noter, h_ter);
exit(-1);
}
int sf_analyse(noterminal* h_noter, terminal* h_ter, char* st) {
printf("\t分析栈 关系 输入串 动作\n");
char anal[16] = {0}; //分析栈
int topa = 0;
anal[topa++] = '#';
char rela[16] = { 0 }; //关系栈
int topr = 0;
rela[topr++] = '#';
st[strlen(st) + 1] = 0; //输入串
st[strlen(st)] = '#';
int tops = 0;
char a;
terminal* ter1, * ter2;
while (true)
{
//取出输入串字符
a = st[tops];
//st[tops++] = ' ';
int j;
//取出分析栈第一个终结符
if (anal[topa - 1] < 63 || anal[topa - 1]>90) j = topa - 1;
else j = topa - 2;
//动作判断和处理
ter1 = check_terminal(anal[j], h_ter);
ter2 = check_terminal(a, h_ter);
if (ter2 == NULL) {
rela[topr++] = '?';
rela[topr++] = a;
printf("\t%-15s %-15s %-15s 无优先关系\n", anal, rela, st);
return 0;
}
char re = ter1->patable[ter2->number];
//接受
if (a == '#' && anal[j] == '#') {
rela[topr++] = '=';
rela[topr++] = '#';
printf("\t%-15s %-15s %-15s 接受\n", anal, rela, st);
return 1;
}
//小于或等于-移进
if (re == '<'|| re == '=') {
rela[topr++] = re;
rela[topr++] = a;
printf("\t%-15s %-15s %-15s 移进\n", anal, rela, st);
anal[topa++] = a;
st[tops++] = ' ';
}
//大于-归约
else if (re == '>') {
rela[topr++] = re;
rela[topr++] = a;
printf("\t%-15s %-15s %-15s ", anal, rela, st);
//归约串提取
char gy[10] = { 0 };
char w;
int p = 1; //关系栈变数
terminal* m1, * m2;
do {
w = anal[j];
p++;
if (anal[j - 1] < 63 || anal[j - 1] > 90)j--;
else j -= 2;
m1 = check_terminal(anal[j], h_ter);
m2 = check_terminal(w, h_ter);
} while (m1->patable[m2->number] != '<');
for (int e = j + 1; e < topa; e++) {
gy[e - j - 1] = anal[e];
anal[e] = 0;
}
//归约
char n = guiyue(gy, h_noter);
if(n ==0){
printf("无匹配产生式\n");
return 0;
}
topa = j + 1;
anal[topa++] = n;
for (int r = 0; r < 2 * p; r++)rela[(topr--) - 1] = 0;
printf("归约\n");
}
//无关系
else {
rela[topr++] = '?';
rela[topr++] = a;
printf("\t%-15s %-15s %-15s 无优先关系\n", anal, rela, st);
return 0;
}
}
}
char guiyue(char* st, noterminal* h_noter) {
noterminal* noter = h_noter;
while (noter->next != NULL) {
noter = noter->next;
for (int i = 0; i < noter->size; i++) {
int le = strlen(noter->guize[i]);
char a;
int e = 0;
for (; e <= le; e++) {
a = noter->guize[i][e];
if (a > 62 && a < 91) {
if (st[e] > 62 && st[e] < 91)continue;
else break;
}
else {
if (st[e] == a)continue;
else break;
}
}
if (e == le + 1) return noter->ch;
}
}
return 0;
}
(四)代码的不足
- 输入的字符串最好不要超过15个字符,不然输出的分析过程的排版不会对齐。
- 如果代码还有其他不足之处,欢迎在评论区指出。