我是分成了两个阶段来进行处理的包括了预处理阶段和词法分析阶段:
1.预处理阶段的代码:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
int choose_1(char*buf,int len,char *cur)
{
int i,j;
while(i<len)
{
if(buf[i]!=' ')
cur[j++]=buf[i++];
else
{
cur[j++]=buf[i];
while(buf[i]==' ')
i++;
}
}
return j+1;
}
int choose_2(char*buf,int len,char *cur)
{
int i,j;
i=0;j=0;
while(i<=len)
{
if(buf[i]=='/'&&buf[i+1]=='*')
{
while(1)
{
i++;
if(buf[i]=='*'&&buf[i+1]=='/')
break;
}
i+=2;
}
if(buf[i]=='/'&&buf[i+1]=='/')
{
while(1)
{
i++;
if(buf[i]=='\n')
break;
}
i+=2;
}
cur[j++]=buf[i++];
}
return j+1;
}
void choose_3(char*buf,int len,char *cur)
{
int i,j;
for(i=0,j=0;i<len;i++)
{
while(buf[i]=='\r'||buf[i]=='\n'||buf[i]=='\t')
i++;
cur[j++]=buf[i];
}
}
int main()
{
FILE* fp=fopen("F:\\词法分析器\\源代码\\data.txt","r+");//这里是我的本文件的路径,如果想搬运的话要换成自己的路径。
char buf[1024];
char cur[1024];
int len,i;
if(fp==NULL)
{
printf("cannot open file!");
exit(0);
}
/*while(fgets(buf,1024,fp)!=NULL)
{
len=strlen(buf);
printf("%s",buf);
}*/
while(fscanf(fp,"%c",&buf[len])!=EOF)
len++;
fclose(fp);
printf("%s\n",buf);
len=choose_1(buf,len,cur);
printf("%s",cur);
printf("%d\n",len);
len=choose_2(cur,len,buf);
printf("%s",buf);
printf("%d\n",len);
choose_3(buf,len,cur);
printf("%s",cur);
FILE*p;
if((p=fopen("F:\\词法分析器\\源代码\\处理后.txt","w"))==NULL)//这里也一样,要换成自己的。
{
printf("cannot create");
exit(0);
}
fprintf(p,"%s",cur);
fclose(p);
return 0;
}
这时候我的处理后的文本文件已经存到“处理后.txt”中了。处理后的文件如下
有多个空格的话就剩下一个,有分隔符的就保留了。
下面是进行词法分析:
2.词法分析程序:
(将单词划出,关键字标出,一些符号标出等)
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
int main()
{
//char* cur,strlen(cur);
FILE*fp;
if((fp=fopen("F:\\词法分析器\\源代码\\处理后.txt","r+"))==NULL)
{
printf("cannot open");
exit(0);
}
char syn[44][10]={"#","main","if","then","while","do","static","int","double","struct","break","else","long","switch","case","typedef","char","return","const","float","short","continue","for","void","sizeof","ID","NUM","+","-","*","/",":",":=","<","<>","<=",">",">=","=","default","do",";","(",")"};
char Token[1024][8],buf[1024],sty[8],type[8];
char *p,*q;
int i,len,j,m;
i=0;
len=0;
while(fscanf(fp,"%c",&buf[len])!=EOF)
len++;
fclose(fp);
buf[len]='\0';
printf("%s\n",buf);
p=buf;
printf("单词\t 关键字\t种别码\n");
while(*p)
{
if((*p>='a'&&*p<='z')||(*p>='A'&&*p<='Z'))
{
q=sty;
while((*p>='a'&&*p<='z')||(*p>='A'&&*p<='Z'))
{
*q=*p;
p++;
q++;
}
*q='\0';
strcpy(Token[i],sty);
for(m=0;m<=43;m++)
if(strcmp(syn[m],Token[i])==0)
break;
if(m<=43)
printf("%s\t %s \t%d\n",Token[i],syn[m],m);
else
printf("%s\t ID \t25\n",Token[i]);
i++;
} //单词情况处理
if(*p>='0'&&*p<='9')
{
q=sty;
while(*p>='0'&&*p<='9')
{
*q=*p;
p++;
q++;
}
*q='\0';
strcpy(Token[i],sty);
printf("%s\t NUM \t 26\n",Token[i]);
i++;
}
//数字处理
if(*p=='>'||*p=='='||*p=='<')
{
q=sty;
while(*p=='>'||*p=='='||*p=='<')
{
*q=*p;
p++;
q++;
}
*q='\0';
strcpy(Token[i],sty);
for(m=0;m<=43;m++)
if(strcmp(syn[m],Token[i])==0)
break;
if(m<=43)
printf("%s\t %s \t%d\n",Token[i],syn[m],m);
i++;
}
//大于小于号处理
//while(*p>='0'&&*p<='9'
if((*p>' '&&*p<='/')||*p==':'||*p==';'||*p=='?'||*p=='@'||*p=='{'||*p=='}')
{
type[0]=*p;
type[1]='\0';
strcpy(Token[i],type);
for(m=0;m<=43;m++)
if(strcmp(syn[m],Token[i])==0)
break;
if(m<=43)
printf("%s\t %s \t%d\n",Token[i],syn[m],m);
i++;
}
//部分符号处理
p++;
}
// for(j=0;j<i;j++)
// printf("%s\n",Token[j]);
return 0;
}
最终的运行结果如下:
完成。本代码可以直接复制粘贴使用,但是要提前在指定路径上创建一个待处理的文本文件(.txt)才能使用。而且我在写的时候为了方便观察就写了很多的printf(),使用的时候可以消除这些printf(),只显示最终结果即可。