制作一个简单的C语言词法分析程序

 代码仓库:JJLi0427/compiler (github.com)

1.分析组成

C语言的程序中,有很单词多符号和保留字。一些单词符号还有对应的左线性文法。所以我们需要先做出一个单词字符表,给出对应的识别码,然后跟据对应的表格来写出程序

2.程序设计

程序主要有循环判断构成。不需推理即可产生的符号我们可以把它包装在函数中,返回值为对应的识别码即可。但是有线性文法的则需要单独的一遍推倒才可以得出词法分析结果。对于测试样例我们可以存储到txt文件中,使用循环读写可以更高效的测试和输出词法分析结果。最终的结果我们用二元式的形式来表示,存储在txt文件中

3.完整程序

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int english(char ch) {
	if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return 1;
	else return 0;
}
int number(char ch) {
	if(ch >= '0' && ch <= '9') return 1;
	else return 0;
}
int reserved(char str[]) {
	if(strcmp(str, "void") == 0) return 3;
    else if(strcmp(str, "int") == 0) return 4;
    else if(strcmp(str, "float") == 0) return 5;
    else if(strcmp(str, "double") == 0) return 6;
    else if(strcmp(str, "if") == 0) return 7;
    else if(strcmp(str, "else") == 0) return 8;
    else if(strcmp(str, "for") == 0) return 9;
    else if(strcmp(str, "do") == 0) return 10;
    else if(strcmp(str, "while") == 0) return 11;
	else if(strcmp(str, "break") == 0) return 12;
    else if(strcmp(str, "return") == 0) return 13;
    else return 1;
}
int symbol(char ch) {
	if(ch == ';') return 14;
	else if(ch == ',') return 15;
	else if(ch == '(') return 16;
	else if(ch == ')') return 17;
	else if(ch == '{') return 18;
	else if(ch == '}') return 19;
	else if(ch == '[') return 20;
	else if(ch == ']') return 21;
	else if(ch == '%') return 22;
	else if(ch == '?') return 23;
	else if(ch == ':') return 24;
	else if(ch == '\'') return 25;
	else if(ch == '\"') return 26;
	else if(ch == '.') return 27;
	else return 0;
}

int main(){
	for(int i = 1; i <= 4; i++){
		char txt1[] = "test";
		char num[6];
		sprintf(num, "%d.txt", i);
		strcat(txt1, num);
		char txt2[] = "analyze";
		sprintf(num, "%d.txt", i);
		strcat(txt2, num);
		FILE *fp = fopen(txt1, "r");
		FILE *fw = fopen(txt2, "wt+");
		int flag = 0;

		char ch = fgetc(fp);	
		while(!feof(fp)) {
			char str[32];
			int j = 0;
			if(ch == ' ' || ch == '\t') {
				ch = fgetc(fp);
				continue;
			}
			else if(ch == '\n'){
				fprintf(fw, "\n");
				ch = fgetc(fp);
				continue;
			}
			else if(english(ch)) {
				str[j++] = ch;
				do{
					ch = fgetc(fp);
					str[j++] = ch;
				}while(english(ch)||number(ch));
				str[j-1] = '\0';
				int id = reserved(str);
				fprintf(fw, "(%d, %s) ", id, str);
			}
			else if(number(ch)) {
				str[j++] = ch;
				do{
					ch = fgetc(fp);
					str[j++] = ch;
				}while(number(ch));
				str[j-1] = '\0';
				fprintf(fw, "(2, %s) ", str);
			}
			else if(symbol(ch) != 0) {
				fprintf(fw, "(%d, %c) ", symbol(ch), ch);
				ch = fgetc(fp);
			}
			else if(ch == '>') {
				ch = fgetc(fp);
				if(ch == '=') {
					fprintf(fw, "(29, >=) ");
					ch = fgetc(fp);
				}
				else if(ch == '>') {
					fprintf(fw, "(30, >>) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(28, >) ");
			}
			else if(ch == '<') {
				ch = fgetc(fp);
				if(ch == '=') {
					fprintf(fw, "(32, <=) ");
					ch = fgetc(fp);
				}
				else if(ch == '<') {
					fprintf(fw, "(33, <<) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(31, <) ");
			}
			else if(ch == '!') {
				ch = fgetc(fp);
				if(ch == '=') {
					fprintf(fw, "(35, !=) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(34, !) ");
			}
			else if(ch == '=') {
				ch = fgetc(fp);
				if(ch == '=') {
					fprintf(fw, "(37, ==) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(36, =) ");
			}
			else if(ch == '/') {
				ch = fgetc(fp);
				if(ch == '*') {
					fprintf(fw, "(Start annotate, /*) ");
					do {
						ch = fgetc(fp);
						if(ch == '*') {
							ch = fgetc(fp);
							if(ch == '/') {
								fprintf(fw, "(End annotate, */) ");
								ch = fgetc(fp);
								break;
							}
						}
					}while(1);
				}
				else if(ch == '/') {
					fprintf(fw, "(annotate, //) ");
					do {
						ch = fgetc(fp);
					}while(ch != '\n');
					fprintf(fw, "\n");
					ch = fgetc(fp);
				}
				else if(ch == '=') {
					fprintf(fw, "(39, /=) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(38, /) ");
			}
			else if(ch == '&') {
				ch = fgetc(fp);
				if(ch == '&') {
					fprintf(fw, "(41, &&) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(40, &) ");
			}
			else if(ch == '|') {
				ch = fgetc(fp);
				if(ch == '|') {
					fprintf(fw, "(43, ||) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(42, |) ");
			}
			else if(ch == '+') {
				ch = fgetc(fp);
				if(ch == '=') {
					fprintf(fw, "(46, +=) ");
					ch = fgetc(fp);
				}
				else if(ch == '+') {
					fprintf(fw, "(45 ++) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(44, +) ");
			}
			else if(ch == '-') {
				ch = fgetc(fp);
				if(ch == '=') {
					fprintf(fw, "(49, -=) ");
					ch = fgetc(fp);
				}
				else if(ch == '-') {
					fprintf(fw, "(48, --) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(47, -) ");
			}
			else if(ch == '*') {
				ch = fgetc(fp);
				if(ch == '=') {
					fprintf(fw, "(51, *=) ");
					ch = fgetc(fp);
				}
				else fprintf(fw, "(50, *) ");
			}
			else if(ch == '\\') {
				ch = fgetc(fp);
				if(ch == 'n') {
					ch = fgetc(fp);
				}
			}
			else {
				fprintf(fw, "Undefined symbol!");
				printf("test%d: Undefined symbol!\n", i); 
				flag = 1;
				break;
			}
		}
		fclose(fp);
		fclose(fw);
		if(flag) continue;
		printf("test%d: Finish analyzing.\n", i);
	}
} 

 4.测试运行

其中一个测试样例:(剩余的大家可以自主编写) 

void test1() {
    int a = 5, b = 10;
    if (a > b) {
        printf("a is greater than b");
    } else if (a < b) {
        printf("b is greater than a");
    } else {
        printf("a and b are equal");
    }

    for (int i = 0; i < 5; i++) {
        printf("%d ", i);
    }
    printf("\n");

    int arr[5] = {1, 2, 3, 4, 5};
    int sum = 0;
    for (int i = 0; i < 5; i++) {
        sum += arr[i];
    }
    printf("The sum of the array is: %d\n", sum);

    return 0;
}

 运行结果:

(3, void) (1, test1) (16, () (17, )) (18, {) 
(4, int) (1, a) (36, =) (2, 5) (15, ,) (1, b) (36, =) (2, 10) (14, ;) 
(7, if) (16, () (1, a) (28, >) (1, b) (17, )) (18, {) 
(1, printf) (16, () (26, ") (1, a) (1, is) (1, greater) (1, than) (1, b) (26, ") (17, )) (14, ;) 
(19, }) (8, else) (7, if) (16, () (1, a) (31, <) (1, b) (17, )) (18, {) 
(1, printf) (16, () (26, ") (1, b) (1, is) (1, greater) (1, than) (1, a) (26, ") (17, )) (14, ;) 
(19, }) (8, else) (18, {) 
(1, printf) (16, () (26, ") (1, a) (1, and) (1, b) (1, are) (1, equal) (26, ") (17, )) (14, ;) 
(19, }) 

(9, for) (16, () (4, int) (1, i) (36, =) (2, 0) (14, ;) (1, i) (31, <) (2, 5) (14, ;) (1, i) (45 ++) (17, )) (18, {) 
(1, printf) (16, () (26, ") (22, %) (1, d) (26, ") (15, ,) (1, i) (17, )) (14, ;) 
(19, }) 
(1, printf) (16, () (26, ") (26, ") (17, )) (14, ;) 

(4, int) (1, arr) (20, [) (2, 5) (21, ]) (36, =) (18, {) (2, 1) (15, ,) (2, 2) (15, ,) (2, 3) (15, ,) (2, 4) (15, ,) (2, 5) (19, }) (14, ;) 
(4, int) (1, sum) (36, =) (2, 0) (14, ;) 
(9, for) (16, () (4, int) (1, i) (36, =) (2, 0) (14, ;) (1, i) (31, <) (2, 5) (14, ;) (1, i) (45 ++) (17, )) (18, {) 
(1, sum) (46, +=) (1, arr) (20, [) (1, i) (21, ]) (14, ;) 
(19, }) 
(1, printf) (16, () (26, ") (1, The) (1, sum) (1, of) (1, the) (1, array) (1, is) (24, :) (22, %) (1, d) (26, ") (15, ,) (1, sum) (17, )) (14, ;) 

(13, return) (2, 0) (14, ;) 
(19, }) 

  • 4
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
以下是一个简单的 C 语言词法分析程序,它可以读取 C 语言源代码并输出每个标识符、关键字、常量和运算符: ```c #include <stdio.h> #include <ctype.h> #include <string.h> #define MAX_LENGTH 100 int main() { char ch, buffer[MAX_LENGTH]; FILE *fp; int i, j, k, len; fp = fopen("example.c", "r"); if (fp == NULL) { printf("Error opening file!"); return 0; } while ((ch = fgetc(fp)) != EOF) { if (isalpha(ch)) { i = 0; buffer[i++] = ch; while (isalnum(ch = fgetc(fp))) buffer[i++] = ch; buffer[i] = '\0'; fseek(fp, -1, SEEK_CUR); if (strcmp(buffer, "if") == 0 || strcmp(buffer, "else") == 0 || strcmp(buffer, "while") == 0 || strcmp(buffer, "for") == 0) printf("%s is a keyword\n", buffer); else printf("%s is an identifier\n", buffer); } else if (isdigit(ch)) { i = 0; buffer[i++] = ch; while (isdigit(ch = fgetc(fp))) buffer[i++] = ch; buffer[i] = '\0'; fseek(fp, -1, SEEK_CUR); printf("%s is a constant\n", buffer); } else if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == '<' || ch == '=' || ch == '&' || ch == '|') { if (ch == '>' || ch == '<' || ch == '=') { if ((ch = fgetc(fp)) == '=') printf("%c%c is a relational operator\n", buffer[0], ch); else { fseek(fp, -1, SEEK_CUR); printf("%c is an arithmetic operator\n", buffer[0]); } } else if (ch == '&' || ch == '|') { if ((ch = fgetc(fp)) == buffer[0]) printf("%c%c is a logical operator\n", buffer[0], ch); else { fseek(fp, -1, SEEK_CUR); printf("%c is an invalid operator\n", buffer[0]); } } else printf("%c is an arithmetic operator\n", ch); } } fclose(fp); return 0; } ``` 该程序使用标准 C 库中的函数来读取源代码并从中提取标识符、关键字、常量和运算符。程序通过逐个字符读取源代码并根据字符的类型进行处理。如果字符是字母,则将其存储在缓冲区中,并读取后续的字母或数字,直到遇到非字母数字字符为止。然后,程序将缓冲区中的内容与关键字列表进行比较,以确定是否为关键字。如果不是关键字,则认为它是标识符。 如果字符是数字,则程序将其存储在缓冲区中,并读取后续的数字,直到遇到非数字字符为止。然后,程序将缓冲区中的内容视为常量。 如果字符是运算符,则程序将其与可能的后续字符进行比较以确定其是何种类型的运算符,例如算术运算符、关系运算符或逻辑运算符。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

哆啦叮当

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值