【编译原理】PL/0编译程序之词法分析 | 实现词法分析器

视频

视频解释

简单的手写,理解原理 + 花时间 = 写出它

核心

  1. 遍历字符流,不需回溯
  2. 没用DFA,全程if-else,思路很好理解
  3. 识别单词的时候分为:1-标识符关键字,2-整数,3-复合运算符,4-单独字符
  4. 类别码是1,2,3…,用enum在头文件里定义了。
  5. 单独字符的类型码统一定义在ssym数组里了,关键字的类型码统一定义在wsym数组里了。因为他们是一一对应的,用数组比较简洁。
  6. 报错做的很简陋,几乎没有
  7. 识别到token就直接输出了,万事从简(懒)…理解思想和方法就可再改进😃
  8. C 库函数 int isalpha( int c ):判断字符是否是字母,int isdigit ( int c ):判断字符是否是数字。当然也可以直接用ASCII码。
  9. C 库函数 int strcmp(const char *str1, const char *str2) 把 str1 所指向的字符串和 str2 所指向的字符串进行比较。
  10. 如果视频看不懂就是我没讲清楚,有点口齿不清…sorry🤧

my.c

#include "my.h"

void error(int n)
{
	printf("Error %3d: %s\n", n, err_msg[n]);
}

void lexer(FILE *fp)
{
	ch = fgetc(fp);
	while (ch != EOF)
	{
		while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')
		{
			ch = fgetc(fp);
		}
		if (isalpha(ch)) // 当前输入为字母,则应该为关键字或标识符
		{
			char a[MAXIDLEN + 1]; // 当前读取到的单词
			int k = 0;
			for (; (isalpha(ch) || isdigit(ch)) && k < MAXIDLEN; k++)
			{
				a[k] = ch;
				ch = fgetc(fp);
			}

			a[k] = '\0'; // 字符数组和字符串的区别就是结尾少了\0,一定要加上!
			// 检查是否为关键字
			int i = 1;
			for (; i <= NRW; i++)
			{
				if (strcmp(a, word[i]) == 0)
					break;
			}
			if (i <= NRW)
			{
				sym = wsym[i]; // symbol is a reserved word
			}
			else
			{
				sym = SYM_IDENTIFIER; // symbol is an identifier
			}
			printf("(%d,%s)\n", sym, a);
		}
		else if (isdigit(ch))
		{ // symbol is a number.
			sym = SYM_NUMBER;
			int k = 0;
			int num = 0;
			while (isdigit(ch))
			{
				num = num * 10 + ch - '0';
				ch = fgetc(fp);
				k++;
			}
			if (k > MAXNUMLEN)
				error(25); // The number is too great.
			else
			{
				printf("(%d,%d)\n", sym, num);
			}
		}
		else if (ch == ':')
		{
			ch = fgetc(fp);
			if (ch == '=')
			{
				sym = SYM_BECOMES; // :=
				ch = fgetc(fp);
				printf("(%d,:=)\n", sym);
			}
			else
			{
				sym = SYM_NULL; // illegal?
			}
		}
		else if (ch == '>')
		{
			ch = fgetc(fp);
			if (ch == '=')
			{
				sym = SYM_GEQ; // >=
				ch = fgetc(fp);
				printf("(%d,>=)\n", sym);
			}
			else
			{
				sym = SYM_GTR; // >
				printf("(%d,=)\n", sym);
			}
		}
		else if (ch == '<')
		{
			ch = fgetc(fp);
			if (ch == '=')
			{
				sym = SYM_LEQ; // <=
				ch = fgetc(fp);
				printf("(%d,<=)\n", sym);
			}
			else if (ch == '>')
			{
				sym = SYM_NEQ; // <>
				ch = fgetc(fp);
			}
			else
			{
				sym = SYM_LES; // <
				printf("(%d,<)\n", sym);
			}
		}
		else if (ch == '{')
		{ //忽略注释
			int end = 1;
			while (end)
			{
				ch = fgetc(fp);
				if (ch == '}')
					end = 0;
			}
			ch = fgetc(fp);
		}
		else
		{ // other tokens : '+', '-', '*', '/', '(', ')', '=', ',', '.', ';'
			//代码和识别关键字那里类似
			int i = 1;
			for (; i <= NSYM; i++)
			{
				if (ch == csym[i])
					break;
			}
			if (i <= NSYM)
			{
				sym = ssym[i];
				printf("(%d,%c)\n", sym, ch);
				ch = fgetc(fp);
			}
			//不应该出现的字符
			else
			{
				printf("Fatal Error: Unknown character.\n");
				exit(1);
			}
		}
	}
	printf("—————文件读取结束—————");
}

int main()
{
	FILE *fp = fopen("source1.txt", "r");
	lexer(fp);
	return 0;
}

my.h

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define NRW        11     // number of reserved words
#define MAXNUMLEN  14     // maximum number of digits in numbers
#define NSYM       10     // maximum number of symbols in array ssym and csym
#define MAXIDLEN   10     // length of identifiers

char ch;         // last character read
int  sym;        // last symbol read
// char line[80];   //输入缓冲,便于报错时输出错误句子

char csym[NSYM + 1] = {
	' ', '+', '-', '*', '/', '(', ')', '=', ',', '.', ';'
};

//关键字
char* word[NRW + 1] = {
	"", /* place holder */
	"begin", "call", "const", "do", "end","if",
	"odd", "procedure", "then", "var", "while"
};
//类别码
enum symtype {
	SYM_NULL,	SYM_IDENTIFIER,	SYM_NUMBER,	SYM_PLUS,	SYM_MINUS,	SYM_TIMES,	SYM_SLASH,	SYM_ODD,	SYM_EQU,	SYM_NEQ,	SYM_LES,	SYM_LEQ,	SYM_GTR,	SYM_GEQ,	SYM_LPAREN,	SYM_RPAREN,	SYM_COMMA,	SYM_SEMICOLON,	SYM_PERIOD,	SYM_BECOMES,    SYM_BEGIN,	SYM_END,	SYM_IF,	SYM_THEN,	SYM_WHILE,	SYM_DO,	SYM_CALL,	SYM_CONST,	SYM_VAR,	SYM_PROCEDURE
};
int wsym[NRW + 1] = {
	SYM_NULL, SYM_BEGIN, SYM_CALL, SYM_CONST, SYM_DO, SYM_END,
	SYM_IF, SYM_ODD, SYM_PROCEDURE, SYM_THEN, SYM_VAR, SYM_WHILE
};
int ssym[NSYM + 1] = {//
	SYM_NULL, SYM_PLUS, SYM_MINUS, SYM_TIMES, SYM_SLASH,
	SYM_LPAREN, SYM_RPAREN, SYM_EQU, SYM_COMMA, SYM_PERIOD, SYM_SEMICOLON
};

//报错信息
char* err_msg[] =
{
/*  0 */    "",
/*  1 */    "Found ':=' when expecting '='.",
/*  2 */    "There must be a number to follow '='.",
/*  3 */    "There must be an '=' to follow the identifier.",
/*  4 */    "There must be an identifier to follow 'const', 'var', or 'procedure'.",
/*  5 */    "Missing ',' or ';'.",
/*  6 */    "Incorrect procedure name.",
/*  7 */    "Statement expected.",
/*  8 */    "Follow the statement is an incorrect symbol.",
/*  9 */    "'.' expected.",
/* 10 */    "';' expected.",
/* 11 */    "Undeclared identifier.",
/* 12 */    "Illegal assignment.",
/* 13 */    "':=' expected.",
/* 14 */    "There must be an identifier to follow the 'call'.",
/* 15 */    "A constant or variable can not be called.",
/* 16 */    "'then' expected.",
/* 17 */    "';' or 'end' expected.",
/* 18 */    "'do' expected.",
/* 19 */    "Incorrect symbol.",
/* 20 */    "Relative operators expected.",
/* 21 */    "Procedure identifier can not be in an expression.",
/* 22 */    "Missing ')'.",
/* 23 */    "The symbol can not be followed by a factor.",
/* 24 */    "The symbol can not be as the beginning of an expression.",
/* 25 */    "The number is too great.",
/* 26 */    "",
/* 27 */    "",
/* 28 */    "",
/* 29 */    "",
/* 30 */    "",
/* 31 */    "",
/* 32 */    "There are too many levels."
};

source1.txt

const a=10;    {常量声明}
const b=20;
var c;         {变量声明}
procedure p;   {过程声明}
     begin
          c:=b+a
     end;

begin
     call p
end.
  • 6
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 7
    评论
C-Minus 的词法规则 (1)关键字: if else int return void while (2)专用符号: + - * / < >= == ~= = ; , ( ) [ ] { } /* */ (3)其他标记为 ID 和 NUM ,通过下列正则表达式定义: ID = letter letter* NUM = digit digit* letter = a|..|z|A|..|Z digit = 0|..|9 (4)空格由空白、换行符、制表符组成。 (5)注释由 /*...*/ 围起来,不能嵌套。 C-Minus 的语法规则 C-Minus 的 BNF 语法如下: 1. program -> declaration_list 2. declaration_list -> declaration_list declaration | declaration 3. declaration -> var_declaration | fun_declaration 4. var_declaration -> type_specifier ID; | type_specifier ID [ NUM ]; 5. type_specifier -> int | void 6. fun_declaration -> type_specifier ID ( params ) compound_stmt 7. params -> param_list | void 8. param_list -> param_list , param | param 9. param -> type_specifier ID | type_specifier ID [ ] 10. compound_stmt -> { local_declarations statement_list } 11. local_declarations -> local_declarations var_declaration | empty 12. statement_list -> statement_list statement | empty 13. statement -> expression_stmt | compound_stmt | selection_stmt | iteration_stmt | return_stmt 14. expression_stmt -> expression ; | ; 15. selection_stmt -> if ( expression ) statement | if ( expression ) statement else statement 16. iteration_stmt -> while ( expression ) statement 17. return_stmt -> return | return expression 18. expression -> var = expression | simple_expression 19. var -> ID | ID [ expression ] 20. simple_expression -> additive_expression relop additive_expression | additive_expression 21. relop -> <= | | >= | == | ~= 22. additive_expression -> additive_expression addop term | term 23. addop -> + | - 24. term -> term mulop factor | factor 25. mulop -> * | / 26. factor -> ( expression ) | var | call | NUM 27. call -> ID ( args ) 28. args -> arg_list | empty 29. arg8list -> arg_list , expression | expression 对以上每条文法规则,给出了相关语义的简短解释。 1. program -> declaration_list 2. declaration_list -> declaration_list declaration | declaration 3. dec

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值