关于词法分析器的原理
词法分析是编译器的第一步,是对读取进来的程序进行初步的工具。主要对关键字、标识符、数子和其他一些特殊字符的识别并保存。
词法分析器编写
看了一下其他博客写的程序,有些没有加上识别注释,有些比较臃肿看不太懂,所以借鉴了这篇博客的代码,加上注释的读取功能,记录一下。最后面会说到一个bug,大家要是能够找出来可以帮忙评论一下,我自己找了好久还是找不出来。
源代码:
#include<stdio.h>
#include<conio.h>
#include<math.h>
#include<string.h>
#include<stdlib.h>
int i, row = 0, line = 0;
char test[1000]; //test文件中的字符
int number[100]; //常数表
char mark[100][5]; //标识符表
int count = 1; //记录出错的行号
//过滤
void guolv() {
char temp[1000];
int flag = 0;
int j = 0;
while (test[flag] != '#') {
//单行注释//
if (test[flag] == '/' && test[flag + 1] == '/') {
while (test[flag] != '\n') {
flag++;
}
}
if (test[flag] == '/' && test[flag + 1] == '*') {
//多行注释/**/
flag = flag + 2;
while (test[flag] != '*' && test[flag + 1] != '/') {
flag++;
}
flag = flag + 2;
}
temp[j] = test[flag];
flag++;
j++;
}
temp[j] = '#';
strcpy(test, temp);
}
//词法分析
int wordanalysis()
{
//标识符和保留字
if ((test[i] >= 'A'&&test[i] <= 'Z') || (test[i] >= 'a'&&test[i] <= 'z'))
{
char word[10];
//保留字表
char pro[100][100] = { "PROGRAM", "BEGIN", "END", "VAR", "INTEGER", "WHILE",
"IF", "THEN", "ELSE", "DO", "PROCEDURE" ,"char",
"int","if","else","var" ,"return","break",
"do","while","for","double","float","short" };
int n = 0;
word[n++] = test[i++];
while ((test[i] >= 'A'&&test[i] <= 'Z') || (test[i] >= '0' && test[i] <= '9') || (test[i] >= 'a'&&test[i] <= 'z'))
{
word[n++] = test[i++];
}
word[n] = '\0';
i--;
//判断该标识符是否为保留字
for (n = 0; n < 100; n++)
{
if (strcmp(word, pro[n]) == 0)
{
printf(">> %s\t(%d,-) 保留字\n", pro[n], n + 1);
return 3;
}
}
//判断该标识符是否在标识符表中
int m = 0;
if (line != 0)
{
int q = 0;
while (q<line)
{
if (strcmp(word, mark[q++]) == 0)
{
printf(">> %s\t(25,%d) 标识符\n", word, q);
return 3;
}
}
}
//将该标识符保存到标识符表中
strcpy(mark[line], word);
printf(">> %s\t(25, %d) 标识符\n", word, line + 1);
line++;
return 3;
}
//数字
else if (test[i] >= '0' && test[i] <= '9')
{
char x[100];
int n = 0;
x[n++] = test[i++];
while (test[i] >= '0' && test[i] <= '9')
{
x[n++] = test[i++];
}
x[n] = '\0';
i--;
int num = atoi(x); //将字符串转换成int型
//判断该常数是否存在于常数表中
if (row != 0)
{
for (int y = 0; y<row; y++)
{
if (number[y] == num)
{
printf(">> %d\t(26,%d)\n", num, y + 1);
return 3;
}
}
}
//将该常数保存到标识符表中
number[row] = num;
int line = row;
printf(">> %d\t(26,%d)\n", num, line + 1);
row++;
return 3;
}
//各种符号
else
switch (test[i])
{
case ' ':
return -1;
case '\n':count++;
return -1;
case '#': return 0;
case '=':printf(">> =\t(27,-)\n"); return 3;
case '<':
i++;
if (test[i] == '=')
{
printf(">> <= \t(28,-)\n");
return 3;
}
else if (test[i] == '>')
{
printf(">> <>\t(29,-)\n");
return 3;
}
else
{
i--;
printf(">> <\t(30,-)\n");
return 3;
}
case '>':
i++;
if (test[i] == '=')
{
printf(">> >=\t(31,-)\n");
return 3;
}
else
{
i--;
printf(">> >\t(32,-)\n");
return 3;
}
case '+': printf(">> +\t(33,-)\n"); return 3;
case '-': printf(">> -\t(34,-)\n"); return 3;
case '*': printf(">> *\t(35,-)\n"); return 3;
case '/':
i++;
if (test[i] != '/') {
i--;
printf(">> /\t(36,-)\n"); return 3;
}
else {
while (1) {
if (test[i++] == '\n')
return -1;
}
printf(">> //\t(37,-)\n"); return 3;
}
case ':': printf(">> :\t(38,-)\n"); return 3;
case ';': printf(">> ;\t(39,-)\n"); return 3;
case '(': printf(">> (\t(40,-)\n"); return 3;
case ')': printf(">> )\t(41,-)\n"); return 3;
case '{': printf(">> {\t(42,-)\n"); return 3;
case '}': printf(">> }\t(43,-)\n"); return 3;
case '[': printf(">> [\t(44,-)\n"); return 3;
case ']': printf(">> ]\t(45,-)\n"); return 3;
case '|': printf(">> |\t(46,-)\n"); return 3;
case '"': printf(">> \"\t(47,-)\n"); return 3;
case ',': printf(">> ,\t(48,-)\n"); return 3;
case '\'': printf(">> '\t(49,-)\n"); return 3;//单引号
case '&':
i++;
if (test[i] != '&') {
i--;
printf(">> &\t(50,-)\n"); return 3;
}
else {
printf(">> &&\t(51,-)\n"); return 3;
}
case '\\': printf(">> \\\t(52,-)\n"); return 3;
default:
printf(">> %c error in %d row\n", test[i], count);
return 3;
}
}
int main()
{
int c = 0;
int m;
i = 0;
//读取文件
FILE *fp;
fp = fopen("D:\\yes.txt", "r");
if (fp == NULL)
{
printf("can't open file!\n");
exit(0);
}
//把文件内容读取到test数组中
while (!feof(fp))
{
test[c++] = fgetc(fp);
}
test[c] = '#';
while(test[i] != '#'){
printf("%c", test[i]);
i++;
}
printf("\n");
printf("------------过滤后的-------------\n");
guolv();
i = 0;
while(test[i] != '#'){
printf("%c", test[i]);
i++;
}
printf("\n");
printf("输出文法分析后的:\n");
i = 0;
do
{
m = wordanalysis();
switch (m)
{
case -1:i++; break;
case 0: i++; break;
case 3: i++; break;
}
} while (m != 0);
return 0;
}
文件内容:
include <stdio.h>
//int main
/*
fdafsd
*/
int main(){
int a;
scanf("%d", &a);
}
编译输出:
include <stdio.h>
//int main
/*
fdafsd
*/
int main(){
int a;
scanf("%d", &a);
}
------------过滤后的-------------
include <stdio.h>
int main(){
int a;
scanf("%d", &a);
}
输出文法分析后的:
>> include (25, 1) 标识符
>> < (30,-)
>> stdio (25, 2) 标识符
>> . error in 1 row
>> h (25, 3) 标识符
>> > (32,-)
>> int (13,-) 保留字
>> main (25, 4) 标识符
>> ( (40,-)
>> ) (41,-)
>> { (42,-)
>> int (13,-) 保留字
>> a (25, 5) 标识符
>> ; (39,-)
>> scanf (25, 6) 标识符
>> ( (40,-)
>> " (47,-)
>> % error in 6 row
>> d (25, 7) 标识符
>> " (47,-)
>> , (48,-)
>> & (50,-)
>> a (25,5) 标识符
>> ) (41,-)
>> ; (39,-)
>> } (43,-)
>> error in 7 row
--------------------------------
Process exited after 0.1993 seconds with return value 0
请按任意键继续. . .
bug分析:
这里好像没有什么问题,但是看运行的最后,有个错误,应该是个其他符号,但是当我在文件内容最后加上#这个字符的时候,却不会出现这个错误,其实即使不加上也不应该出现这个错误的,觉得很奇怪,想了很久,debug了很久也没解决。