词法分析
1.主要能够识别部分C语言的关键字、运算符、分界符、标识符、常量(包括整型常量,浮点数常量),并能处理注释、部分复合运算符(如>=等)。
1)标识符:可由字母,数字和下划线组成。标识符必须以字母或下划线开
头,大,小写的字母分别认为是两个不同的字符。
常量:包括整型常量和浮点数常量。
2)标识符(id)和常量(num)通过以下正规式定义:
id=(letter|dline)(letter|digit)*
num=digit (digit*|.digit*)
2.单词符号的种别编码方案:
单词符号 | 种别码 | 单词符号 | 种别码 | 单词符号 | 种别码 | 单词符号 | 种别码 |
auto | 1 | short | 23 | . | 45 | ? | 67 |
break | 2 | signed | 24 | ! | 46 | | | 68 |
case | 3 | sezeof | 25 | ~+ | 47 | , | 69 |
char | 4 | static | 26 | + | 48 | ; | 70 |
const | 5 | struct | 27 | ++ | 49 | : | 71 |
continue | 6 | switch | 28 | - | 50 | \ | 72 |
default | 7 | typedef | 29 | -- | 51 | -= | 73 |
do | 8 | union | 30 | * | 52 | += | 74 |
double | 9 | unsigned | 31 | & | 53 | *= | 75 |
else | 10 | void | 32 | && | 54 | /= | 76 |
enum | 11 | volatile | 33 | / | 55 | %= | 77 |
extern | 12 | while | 34 | % | 56 | >>= | 78 |
float | 13 | _bool | 35 | < | 57 | <<= | 79 |
for | 14 | _Complex | 36 | > | 58 | &= | 80 |
goto | 15 | _Imaginary | 37 | << | 59 | ^= | 81 |
if | 16 | ( | 38 | >> | 60 | |= | 82 |
inline | 17 | ) | 39 | <= | 61 | 1= | 83 |
int | 18 | [ | 40 | >= | 62 | 标识符(id) | 84 |
long | 19 | ] | 41 | 0= | 63 | 常量(num) | 85 |
register | 20 | { | 42 | != | 64 | ||
restrict | 21 | } | 43 | ^ | 65 | ||
return | 22 | -> | 44 | || | 66 |
3.词法分析程序的算法思想
从字符串表示的源程序中识别出具有独立意义的单词符号,其基本思想是根据扫描到单词符号的第一个字种类,拼出相应的单词符号。
语法分析:
1.所识别的C 语言上下文无关文法用扩充的BNF表示如下:
E_E –> id = E ;
E –> T { +T | -T }
T –> F { *F | /F }
F –> ( E ) | id
S –> P Q ;
P –> int | float | double | long
Q –> id { , id }
2.语法分析程序的算法思想
有顺序地扫描具有独立意义的单词符号,对于每一个单词按照上面文法进行归约,如果不符合上述文法的单词则报错并做相应的错误恢复,直至结束。
语义分析:
1.语义分析程序算法思想
采用递归下降语法制导翻译的方法,对算术表达式,赋值语句在语法分析的基础上对于每一个文法的归约做相应的语义处理。在声明语句⾥做的主要动作是将遇到的每一个新的变量填入变量表中,在简单赋值语句里做的主要语义处理是当遇到新的变量时即查变量表,对每次归约做相应的加,减,乘,除的动作并生成相应的四元式序列.
可以执行程序代码:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define _KEY_WORD_END "waiting for your expanding"
int flag = 1;
struct
{
char result[10];
char ag1[10];
char op[10];
char ag2[10];
} quad[100];
typedef struct
{
int typenum;
char *word;
}WORD;
WORD *oneword = new WORD;
char input[255];
char token[255]="";
int p_input;
int p_token;
char ch;
char * rwtab[] = {"auto","break","case","char","const","continue",
"default","do","double","else","enum","extern","float",
"for","goto","if","inline","int","long","register","restrict",
"return","short","signed","sizeof","static","struct","switch","typedef","union",
"unsigned","void","volatile","while","_bool","_Complex","_Imaginary",_KEY_WORD_END};
WORD *scaner();
//-----------------------------------------------------------------------------------语义分析
char table[100][10];
int t_n=0;
void add(char *t_a)
{
strcpy(table[t_n],t_a);
t_n++;
}
int lookup(char *name)
{
int i=0;
for(i=0;i<t_n;i++)
{
if(strcmp(name,table[i])==0) return 1;
}
return 0;
}
void itoa(int k1,char* m1) //
{
int i=0,s=0;
char temp;
while(k1)
{
s=k1%10;
k1=k1/10;
m1[i]='0'+s;
i++;
}
for(s=i-1,i=s;i>s/2;i--)
{
temp=m1[i];
m1[i]=m1[s-i];
m1[s-i]=temp;
}
m1[s+1]='\0';
}
int k=0;// 临时变量的个数
int ek=0;//四元式的个数
char *newtemp()
{
char *p;
char m[10];
p=(char *)malloc(10);
k++;
itoa(k,m);
strcpy(p+1,m);
p[0]='t';
return (p);
}
void emit(char *r,char *a1,char *o,char *a2)
{
strcpy(quad[ek].result,r);
strcpy(quad[ek].ag1,a1);
strcpy(quad[ek].op,o);
strcpy(quad[ek].ag2,a2);
ek++;
printf("(%d) %s = %s %s %s\n",ek,r,a1,o,a2);
}
//--------------------------------------------------赋值语句
void retract();
void all_s();
char *E();
char *F()
{
char r[10];
char a1[10];
char a2[10];
char op[10];
if(oneword->typenum==84||oneword->typenum==85)
{
strcpy(r,oneword->word);
if(oneword->typenum==84)
{if(lookup(oneword->word)==0) {printf("%s no defind\n",oneword->word);flag=0;}}
oneword=scaner();
return r;
}
else
{
if(oneword->word==";"||oneword->typenum==1000) {printf("miss id\n");flag=0;return "";}
if(oneword->word=="*"||oneword->word=="+"||oneword->word=="-"||oneword->word=="/")
{
printf("error,extra char\n");flag=0;
}
else
{
if(oneword->word!="(")
{
printf("error,extra char \n");
flag=0;
oneword=scaner();
}
else
{
oneword= scaner();
strcpy(r,E());
if(oneword->word==")")
{
oneword=scaner();
return r;
}
else
{
printf("error,miss ) in F()\n");flag=0;retract();return "null";
}
}
}
}
}
char *T()
{
char r[10];
char a1[10];
char a2[10];
char op[10];
strcpy(a1,F());
strcpy(r,a1);
if(oneword->typenum==84||oneword->typenum==85)
{printf("error,extra id \n");flag=0;oneword=scaner();}
while(oneword->word=="*"||oneword->word=="/")
{
strcpy(op,oneword->word);
oneword = scaner();
while(oneword->word=="*"||oneword->word=="-"||oneword->word=="+"||oneword->word=="/"||oneword->word==")")
{printf("error,extra char \n");flag=0;oneword=scaner();}
strcpy(a2,F());
strcpy(r,newtemp());
emit(r,a1,op,a2);
strcpy(a1,r);
if(oneword->typenum==84||oneword->typenum==85)
{printf("error,extra id \n");flag=0;oneword=scaner();}
}
return r;
}
char *E()
{
char r[10];
char a1[10];
char a2[10];
char op[10];
strcpy(a1,T());
strcpy(r,a1);
if(oneword->typenum==84||oneword->typenum==85)
{printf("error,extra id \n");flag=0;oneword=scaner();}
while(oneword->word=="+"||oneword->word=="-")
{
strcpy(op,oneword->word);
oneword=scaner();
if(oneword->word=="*"||oneword->word=="-"||oneword->word=="+"||oneword->word=="/"||oneword->word==")")
{printf("error,extra char \n");flag=0;oneword=scaner();}
strcpy(a2,T());
strcpy(r,newtemp());
emit(r,a1,op,a2);
strcpy(a1,r);
if(oneword->typenum==84||oneword->typenum==85)
{printf("error,extra id \n");flag=0;oneword=scaner();}
else if(oneword->word=="(")
{
printf("error,extra or ( in E1()\n");
flag=0;
oneword=scaner();
}
}
return r;
}
void E_E()
{
char r[10];
char a1[10];
char a2[10];
char op[10];
if(oneword->typenum!=84) {printf("error,miss letter\n");flag=0;}
else
{
if(oneword->typenum==84)
{
if(lookup(oneword->word)==0) {printf("%s no defind\n",oneword->word);}}
strcpy(r,oneword->word);
oneword= scaner();
if(oneword->word!="=") {printf("error,miss =\n");retract();flag=0;}
oneword=scaner();
if(oneword->word==")")
{printf("error,extra ) in E_E()\n");flag=0;oneword=scaner();}
strcpy(a1,E());
emit(r,a1,"","");
}
}
//----------------------------------------------------------------------------
void S_1()
{
if(oneword->typenum!=84)
{printf("error,miss element in defind\n");flag=0;retract();}
add(oneword->word);
oneword = scaner();
if(oneword->word=="(")
{
printf("error\n");
flag=0;
}
else
{
while(oneword->word==",")
{
oneword = scaner();
if(oneword->typenum!=84)
{printf("error,miss id in defind\n");flag=0;retract();}
add(oneword->word);
oneword=scaner();
}
if(oneword->word!=";")
{
printf("error,miss ;\n");flag=0;
}
oneword=scaner();
}
}
void S() //声明语句
{
int i;
i=oneword->typenum;
if(i==4||i==9||i==13||i==18||i==19||i==23)
{
oneword=scaner();
S_1();
}
else {printf("error,in defind\n");flag=0;}
}
//----------------------------------------------------------------------------
void all_s1()
{
int i=oneword->typenum;
if(i==84||i==14||i==16||i==34||i==8||i==4||i==9||i==18||i==19||i==23)
{
while(i==84||i==14||i==16||i==34||i==8||i==4||i==9||i==18||i==19||i==23)
{
switch(i)
{
case 4:
case 9:
case 13:
case 18:
case 19:
case 23:S();i=oneword->typenum;break;
case 84:E_E();
if(oneword->word==";")
{oneword = scaner();}
else
{printf("error\n");flag=0;}
i=oneword->typenum;
break;
default: printf("error,no sentence in all_s1\n");flag=0;break;
}
}
}
else
{printf("error,no sentence in all_s1\n");flag=0;}
}
void all_s()
{
if(oneword->word=="{")
{
oneword = scaner();
all_s1();
if(oneword->word=="}")
{
oneword=scaner();
}
else
{printf("error,miss } in all_s\n");flag=0;}
}
else
{all_s1();}
}
//----------------------------------------------------------------------------
int main()
{
int over = 1;
int i=0;
scanf("%[^#]s",input);
p_input=0;
i=0;
oneword=scaner();
all_s();
//printf("word=%s\ntypenum=%d\n",oneword->word,oneword->typenum);
if(flag==1) printf("success\n");
return 0;
}
//
char m_getch()
{
ch=input[p_input];
p_input = p_input+1;
return ch;
}
//
void getbc()
{
while(ch==' '||ch==10)
{
ch=input[p_input];
p_input = p_input+1;
}
}
//
void concat()
{
token[p_token] = ch;
p_token = p_token+1;
token[p_token] = '\0';
}
//
int dline()
{
if(ch=='_') return 1;
else return 0;
}
//
int letter()
{
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')) return 1;
else return 0;
}
//
int digit()
{
if(ch>='0'&&ch<='9') return 1;
else return 0;
}
//
int reserve()
{
int i=0;
while(strcmp(rwtab[i],_KEY_WORD_END))
{
if(!strcmp(rwtab[i],token))
{
return i+1;
}
i=i+1;
}
return 84;
}
//
void retract()
{
p_input = p_input-1;
}
//
char *dtb()
{
return NULL;
}
WORD *scaner()
{
WORD *myword = new WORD;
myword->typenum = 10;
myword->word="";
p_token=0;
m_getch();
getbc();
if(letter()||dline()) //标识符
{
while(letter()||digit()||dline())
{
concat();
m_getch();
}
retract();
myword->typenum=reserve();
myword->word=token;
return myword;
}
else if(digit()) //数字
{
while(digit())
{
concat();
m_getch();
}
if(ch=='.')
{
concat();
m_getch();
while(digit())
{
concat();
m_getch();
}
}
retract();
myword->typenum =85;
myword->word = token;
return myword;
}
else switch(ch)
{
case '(':
myword->typenum=38;
myword->word="(";
return myword;
break;
case ')':
myword->typenum=39;
myword->word=")";
return myword;
break;
case '[':
myword->typenum=40;
myword->word="[";
return myword;
break;
case '{':
myword->typenum=41;
myword->word="{";
return myword;
break;
case '}':
myword->typenum=42;
myword->word="}";
return myword;
break;
case '-':
m_getch();
if(ch=='>')
{
myword->typenum=44;
myword->word="->";
return myword;
}
if(ch=='-')
{
myword->typenum=51;
myword->word="--";
return myword;
}
retract();
myword->typenum=50;
myword->word="-";
return myword;
break;
case '.':
myword->typenum=45;
myword->word=".";
return myword;
break;
case '!':
m_getch();
if(ch=='=')
{
myword->typenum=64;
myword->word="!=";
return myword;
}
retract();
myword->typenum=46;
myword->word="!";
return myword;
break;
case '~':
myword->typenum=47;
myword->word="~";
return myword;
break;
case '+':
m_getch();
if(ch=='=')
{
myword->typenum=74;
myword->word="+=";
return myword;
}
if(ch=='+')
{
myword->typenum=49;
myword->word="++";
return myword;
}
retract();
myword->typenum=48;
myword->word="+";
return myword;
break;
case '*':
m_getch();
if(ch=='=')
{
myword->typenum=75;
myword->word="*=";
return myword;
}
retract();
myword->typenum=52;
myword->word="*";
return myword;
break;
case '&':
m_getch();
if(ch=='&')
{
myword->typenum=54;
myword->word="&&";
return myword;
}
if(ch=='=')
{
myword->typenum=70;
myword->word="&=";
return myword;
}
retract();
myword->typenum=53;
myword->word="&";
return myword;
break;
case '/':
m_getch();
if(ch=='/')
{
m_getch();
while(ch!='\n')
{
m_getch();
}
myword->typenum=-2;
myword->word="";
return myword;
}
if(ch=='*')
{
m_getch();
while(ch!='\0')
{
if(ch=='*')
{m_getch();
if(ch=='/')
{
myword->typenum=-2;
myword->word="";
return myword;
}
}
m_getch();
}
}
if(ch=='=')
{
myword->typenum=76;
myword->word="/=";
return myword;
}
retract();
myword->typenum=55;
myword->word="/";
return myword;
break;
case '%':
m_getch();
if(ch=='=')
{
myword->typenum=77;
myword->word="&=";
return myword;
}
retract();
myword->typenum=56;
myword->word="%";
return myword;
break;
case '<':
m_getch();
if(ch=='=')
{
myword->typenum=61;
myword->word="<=";
return myword;
}
if(ch=='<')
{
m_getch();
if(ch=='=')
{
myword->typenum=79;
myword->word="<<=";
return myword;
}
retract();
myword->typenum=59;
myword->word="<<";
return myword;
}
retract();
myword->typenum=57;
myword->word="<";
return myword;
break;
case '>':
m_getch();
if(ch=='=')
{
myword->typenum=62;
myword->word=">=";
return myword;
}
if(ch=='>')
{
m_getch();
if(ch=='=')
{
myword->typenum=78;
myword->word=">>=";
return myword;
}
retract();
myword->typenum=60;
myword->word=">>";
return myword;
}
retract();
myword->typenum=58;
myword->word=">";
return myword;
break;
case '=':
m_getch();
if(ch=='=')
{
myword->typenum=63;
myword->word="==";
return myword;
}
retract();
myword->typenum=83;
myword->word="=";
return myword;
break;
case '^':
m_getch();
if(ch=='=')
{
myword->typenum=81;
myword->word="^=";
return myword;
}
retract();
myword->typenum=65;
myword->word="^";
return myword;
break;
case '|':
m_getch();
if(ch=='|')
{
myword->typenum=66;
myword->word="||";
return myword;
}
retract();
myword->typenum=68;
myword->word="|";
return myword;
break;
case '?':
myword->typenum=67;
myword->word="?";
return myword;
break;
case ',':
myword->typenum=69;
myword->word=",";
return myword;
break;
case ';':
myword->typenum=70;
myword->word=";";
return myword;
break;
case ':':
myword->typenum=71;
myword->word=":";
return myword;
break;
case '\\':
myword->typenum=72;
myword->word="\\";
return myword;
break;
case '\0':
myword->typenum=1000;
myword->word="OVER";
return myword;
break;
case '#':
myword->typenum=1000;
myword->word="OVER";
return myword;
break;
default:
myword->typenum=-1;
myword->word="ERROR";
return myword;
}
}
测试结果与分析:
操作说明:1键盘输入; 2可以输入多行; 3以#号结束。
测试数据 1: int a;#
输出:success
分析:输入为正确的声明语句,所以输出success表正确输入,#是结束符。
测试数据 2:
Int a, ;
int ;
int #
输出:
error,miss id in defind
error,miss element in defind
error,miss element in defind
error,miss ;
分析:第一行的输入中少了一个变量名,在第二行的输入中也是少了一个变量名,在第三行的输入中少了一个变量名和一个‘;’符,程序报了4个错,并做了相应的错误恢复。
测试数据 3:a=2+-3;#
输出:
a no defind
error,extra char
(1) t1 = 2 + 3
(2) a = t1
分析:变量名a没有提前声明就使用,程序报了1个错;‘+’号和‘-’号之间少了一个变量,程序再报一个错,并做了错误恢复输出2条四元式序列。
测试数据 4:
int a;
a 2+3;#
输出:
error,miss =
(1) t1 = a + 3
(2) a = t1
分析:变量之后缺少‘=’号,程序报错,并做了错误恢复。
测试数据 5:
int a;
a= ; #
输出:
miss id
(1) a =
分析:赋值语句的‘=’后面至少应该有一个变量,输入里‘=’后面是‘;’号代表一行结束,所以至少缺少一个变量名,程序报错并做了错误恢复。
测试数据 6:
int a;
a=3+((a+4;#
输出:
error,extra char
(1) t1 = a + 4
error,miss ) in F()
error,miss ) in F()
(2) t2 = 3 + null
(3) a = t2
分析:输入的赋值语句里少了2个‘)’号,所以程序报2个错。
测试数据 7:
int a;
a=3+4*(5+6));#
输出:
(1) t1 = 5 + 6
(2) t2 = 4 * t1
(3) t3 = 3 + t2
(4) a = t3
error
分析:输入的赋值语句最后多了1个‘)’号,程序报了一个错。赋值语句a=3+4*(5+6)的计算顺序是先计算括号里的加法,再计算括号外的乘法,最后计算括号外的加法,输出结果表明计算顺序是正确的。
测试数据 8:
int a;
a=a+4 4;#
输出:
error,extra id
(1) t1 = a + 4
(2) a = t1
分析:赋值语句的最后多了1个数字,程序报了1个错。
测试数据 9:
int a;
a=3+4*(5+7);#
输出:
(1) t1 = 5 + 7
(2) t2 = 4 * t1
(3) t3 = 3 + t2
(4) a = t3
success
分析:输入的是正确的声明语句和赋值语句,程序输出的四元式序列也是正确的。
测试数据 10:
int a;
a=(3+5)-4*+)*4+99*8;
int b;
a b+3;
a=b*(b+c)+4;
a=b 3;#
输出:
(1) t1 = 3 + 5
error,extra char
error,extra char
error,extra char
(2) t2 = 4 * 4
(3) t3 = t1 - t2
(4) t4 = 99 * 8
(5) t5 = t3 + t4
(6) a = t5
error,miss =
(7) t6 = b + 3
(8) a = t6
c no defind
(9) t7 = b + c
(10) t8 = b * t7
(11) t9 = t8 + 4
(12) a = t9
error,extra id
(13) a = b
分析:这里输入了多条声明语句和多条赋值语句,声明语句和赋值语句的顺序也有交叉,其中第一条赋值语句里有3个错误,第二条赋值语句里少了一个‘=’号,第三条赋值语句里的变量c没有声明,第四条赋值语句有一个错误,程序做了相应错误恢复和四元式序列的输出。