直接撸代码
#include <stdio.h>
#include <string.h>
#define BUFSIZE 256
static char keyWords[][13]={"abstract", "boolean", "break", "byte", "case", "catch", "char",
"class", "const", "continue", "default", "do", "double", "else", "extends", "false",
"final", "finally", "float", "for", "goto", "if", "implements", "import", "instanceof",
"int", "interface", "long", "native", "new", "null", "package", "private", "protected",
"public", "return", "short", "static", "super", "switch", "synchronized", "this",
"throw", "throws", "transient", "true", "try", "void", "volatile", "while"}; /*关键字 50个*/
FILE *fin; /*要处理的java文件*/
FILE *fout; /*保存处理结果的文件*/
char filePath[100]; /*要处理的java文件路径*/
char savePath[100] = "javascan_result.txt"; /*保存处理结果文件的路径*/
char inBuf[2][BUFSIZE]; /*输入缓冲区*/
char scanBuf[BUFSIZE]; /*扫描缓冲区*/
int bufNum = 0; /*输入缓冲区编号*/
long totalWords = 0; /*总单词数量*/
long lineWords = 0; /*单行单词个数*/
long lineCount = 0; /*记录行数*/
void dealWord(); /*词法分析*/
void clearNote(); /*过滤注释及多余空格*/
void getWords(int state); /*解析获取单词*/
void getWordKind(char str[]); /*获取单词类型*/
int getCharKind(char c); /*判断字符类型*/
int isOper(char c); /*判断是否为可消除空格的字符*/
int isWord(char c); /*判断是否为字母*/
int isKeyWord(char str[]); /*判断是否为关键字*/
int isSignWord(char str[]); /*判断是否为标识符*/
int isNumber(char c); /*判断是否为数字*/
int isInteger(char str[]); /*判断是否为整型*/
int isFloat(char str[]); /*判断是否为浮点型*/
int main()
{
/*printf("Please set the path of the Java File that you want to analyse:");*/
printf("请设置要分析的Java程序文件的路径:");
scanf("%s", filePath);
if((fin = fopen(filePath, "r")) == NULL)
{
printf("Can't open this file!\n");
return 0;
}
if((fout = fopen(savePath, "w")) == NULL)
{
printf("Can't write to this file!\n");
return 0;
}
fprintf(fout, "Java词法分析器 Powered By Hamvorinf \n------------------------------------------------------\n");
dealWord();
fprintf(fout, "------------------------------------------------------\n分析结束!\n");
fclose(fin);
fclose(fout);
system("pause");
return 0;
}
/*******************词法分析*******************/
void dealWord()
{
char c;
int i, j, k;
int count = 0;
int chgLine = 0; /*换行标志*/
/*循环读取字符*/
while(1)
{
c = fgetc(fin);
if(c == EOF)
{
break;
}
/*如果是换行符*/
if(c == '\n' || count == BUFSIZE - 2)
{
/*如果长度超出,则放入对半互补区*/
if(count == BUFSIZE - 2)
{
inBuf[bufNum][count] = c;
for(i = count; i > 0; i--)
{
if(isOper(inBuf[bufNum][i]) == 1)
{
for(j = 0, k = i+1; k <= count; k++, j++)
{
inBuf[1-bufNum][j] = inBuf[bufNum][k];
}
inBuf[1-bufNum][j] = '\0';
inBuf[bufNum][i+1] = '\0';
}
}
}
else
{
inBuf[bufNum][count] = '\0';
}
/*若一行读取完毕,将换行标志置为1*/
if(c == '\n')
{
chgLine = 1;
lineCount++;
}
clearNote(inBuf[bufNum]); /*过滤注释和多余空格、制表符*/
/*分析处理*/
if(strlen(inBuf[bufNum]) > 0)
{
/*将数据从输入缓冲区拷贝如扫描缓冲区*/
strcpy(scanBuf, inBuf[bufNum]);
/*对扫描缓冲区进行词语分析处理*/
getWords(0);
}
/*如果长度超出,启用对半互补区*/
if(count == BUFSIZE - 2)
{
bufNum = 1 - bufNum;
}
/*一行处理完毕输出当行单词数*/
if(chgLine == 1)
{
printf("第%-4ld行共有 %-4ld个单词.\n", lineCount, lineWords);
chgLine = 0;
lineWords = 0;
}
count = 0;
}
else
{
inBuf[bufNum][count++] = c;
}
}
printf("共处理了 %-4ld行的 %-4ld个单词\n", lineCount, totalWords);
printf("处理结果已经保存到 %s 下!\n", savePath);
}
/*******************过滤注释及多余空格*******************/
void clearNote()
{
int i, j, k;
int noteCount = 0;
int flag = 0; /*是否是字符串*/
char note[100];
/*去除注释*/
for(i = 0; inBuf[bufNum][i] != '\0'; i++)
{
if(inBuf[bufNum][i] == '"')
{
flag = 1 - flag;
continue;
}
if(inBuf[bufNum][i] == '/' && flag == 0)
{
/*如果是“//”注释*/
if(inBuf[bufNum][i+1] == '/')
{
for(j = i; inBuf[bufNum][j] != '\0'; j++)
{
note[noteCount++] = inBuf[bufNum][j];
}
note[noteCount] = '\0';
noteCount = 0;
lineWords++;
fprintf(fout, "%4ld.%-4ld 【%s】 【注释】---【0x101】\n",lineCount, lineWords, note);
inBuf[bufNum][i] = '\0';
break;
}
if(inBuf[bufNum][i+1] == '*')
{
note[noteCount++] = '/';
note[noteCount++] = '*';
for(j = i+2; inBuf[bufNum][j] != '\0'; j++)
{
note[noteCount++] = inBuf[bufNum][j];
if(inBuf[bufNum][j] == '*' && inBuf[bufNum][j+1] == '/')
{
j += 2;
note[noteCount++] = inBuf[bufNum][j];
note[noteCount] = '\0';
noteCount = 0;
lineWords++;
fprintf(fout, "%4ld.%-4ld 【%s】 【注释】---【0x101】\n",lineCount, lineWords, note);
break;
}
}
for(; inBuf[bufNum][j] != '\0'; j++, i++)
{
inBuf[bufNum][i] = inBuf[bufNum][j];
}
if(inBuf[bufNum][j] == '\0')
{
inBuf[bufNum][i] = '\0';
}
}
}
}
/*去除多余空格*/
for(i = 0, flag = 0; inBuf[bufNum][i] != '\0'; i++)
{
if(inBuf[bufNum][i] == '"')
{
flag = 1 - flag;
continue;
}
if(inBuf[bufNum][i] == ' ' && flag == 0)
{
for(j = i+1; inBuf[bufNum][j] != '\0' && inBuf[bufNum][j] == ' '; j++)
{
}
if(inBuf[bufNum][j] == '\0')
{
inBuf[bufNum][i] = '\0';
break;
}
if(inBuf[bufNum][j] != '\0' && ((isOper(inBuf[bufNum][j]) == 1) || (i > 0 && isOper(inBuf[bufNum][i-1]) == 1)))
{
for(k = i; inBuf[bufNum][j] != '\0'; j++, k++)
{
inBuf[bufNum][k] = inBuf[bufNum][j];
}
inBuf[bufNum][k] = '\0';
i--;
}
}
}
/*去除多余制表符*/
for(i = 0, flag = 0; inBuf[bufNum][i] != '\0'; i++)
{
if(inBuf[bufNum][i] == '\t')
{
for( j = i; inBuf[bufNum][j] != '\0'; j++)
{
inBuf[bufNum][j] = inBuf[bufNum][j+1];
}
i = -1;
}
}
}
/*******************判断是否为可消除空格的字符/操作符*******************/
int isOper(char c)
{
if((c > 'z' || (c < 'a' && c > 'Z') || (c < 'A' && c > '9') || (c < '0')) && c != '_' && c != '$')
{
return 1;
}
return 0;
}
/*******************判断是否为字母*******************/
int isWord(char c)
{
if((c <= 'z' && c >= 'a') || (c <= 'Z' && c >= 'A'))
{
return 1;
}
return 0;
}
/*******************判断是否为数字*******************/
int isNumber(char c)
{
if(c <= '9' && c >= '0')
{
return 1;
}
return 0;
}
/*******************判断是否为整型*******************/
int isInteger(char str[])
{
int i;
if(str[0] == '-' || isNumber(str[0]) == 1)
{
for(i = 0; i < strlen(str); i++)
{
if(str[i] == '.')
{
return 0;
}
if((str[i] == 'x' || str[i] == 'X') && (((str[0] == '-' || str[0] == '+') && (str[1] != '0' || i > 2)) || (str[0] != '-' && str[0] != '+' && (str[0] != '0' || i > 1))))
{
return 0;
}
if((i < strlen(str) -1) && isNumber(str[i]) == 0 && str[i] != 'x' && str[i] != 'X')
{
if(strlen(str) > 2 && strnicmp("0x", str, 2) == 0 || strnicmp("-0x", str, 3) == 0)
{
if(str[i] >= 'A' && str[i] <= 'F')
{
continue;
}
}
return 0;
}
if((i == strlen(str) - 1) && isNumber(str[i]) ==0 && str[i] != 'L')
{
if(strlen(str) > 2 && strnicmp("0x", str, 2) == 0 || strnicmp("-0x", str, 3) == 0)
{
if(str[i] >= 'A' && str[i] <= 'F')
{
continue;
}
}
return 0;
}
}
return 1;
}
return 0;
}
/*******************判断是否为浮点型*******************/
int isFloat(char str[])
{
int i;
int flag = 0;
if(str[0] == '-' || isNumber(str[0]) == 1)
{
for(i = 0; i < strlen(str); i++)
{
if(str[i] == '.')
{
if(flag == 0)
{
flag = 1;
continue;
}
else
{
return 0;
}
}
if((str[i] == 'x' || str[i] == 'X') && (((str[0] == '-' || str[0] == '+') && (str[1] != '0' || i > 2)) || (str[0] != '-' && str[0] != '+' && (str[0] != '0' || i > 1))))
{
return 0;
}
if(isNumber(str[i]) == 0 && str[i] != 'x' && str[i] != 'X')
{
if(strlen(str) > 2 && strnicmp("0x", str, 2) == 0 || strnicmp("-0x", str, 3) == 0)
{
if(str[i] >= 'A' && str[i] <= 'F')
{
continue;
}
}
return 0;
}
}
return flag;
}
return 0;
}
/*******************判断字符类型*******************/
int getCharKind(char c)
{
/*是字母*/
if(isWord(c) == 1)
{
return 1;
}
/*是数字*/
if(isNumber(c) == 1)
{
return 2;
}
/*是$或_*/
if(c == '$' || c == '_')
{
return 3;
}
/*是转义字符*/
if(c == '\\')
{
return 4;
}
/*是等号*/
/*if(c == '=')
{
return 5;
}*/
return 0;
}
/*******************判断是否为关键字*******************/
int isKeyWord(char str[])
{
int i;
for(i = 0; i < 50; i++)
{
if(strcmp(str, keyWords[i]) == 0)
{
return 1;
}
}
return 0;
}
/*******************判断是否为标识符*******************/
int isSignWord(char str[])
{
int i;
if(str[0] == '$' || str[0] == '_' || isWord(str[0]) == 1)
{
for(i = 0; str[i] != '\0'; i++)
{
if(isOper(str[i]) == 1)
{
return 0;
}
}
return 1;
}
return 0;
}
/*******************获取单词类型*******************/
void getWordKind(char str[])
{
int i, j, k;
int flag = 0;
/*判断是否为关键字或标识符*/
if(isKeyWord(str) == 1)
{
if(strcmp(str,"true") == 0 || strcmp(str,"false") == 0)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【布尔型】---【0x105】\n",lineCount, lineWords, str);
}
else
{
fprintf(fout, "%4ld.%-4ld 【%s】 【关键字】---【0x103】\n",lineCount, lineWords, str);
}
}
else if(isSignWord(str) == 1)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【标识符】---【0x104】\n",lineCount, lineWords, str);
}
else if(isInteger(str) == 1)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【整型】---【0x107】\n",lineCount, lineWords, str);
}
else if(isFloat(str) == 1)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【浮点型】---【0x108】\n",lineCount, lineWords, str);
}
else if(str[0] == '\'' && str[strlen(str)-1] == '\'')
{
fprintf(fout, "%4ld.%-4ld 【%s】 【字符型】---【0x106】\n",lineCount, lineWords, str);
}
else if(str[0] == '"' && str[strlen(str)-1] == '"')
{
fprintf(fout, "%4ld.%-4ld 【%s】 【字符串】---【0x109】\n",lineCount, lineWords, str);
}
else if(isOper(str[0]) == 1 && str[0] != '"' && str[0] != '\'')
{
if(strcmp(str, "<") == 0 || strcmp(str, ">") == 0 || strcmp(str, "<=") == 0 || strcmp(str, ">=") == 0)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【< > <= >=】---【0x118】\n",lineCount, lineWords, str);
}
else if(strcmp(str, "<<") == 0 || strcmp(str, ">>") == 0 || strcmp(str, ">>>") == 0 || strcmp(str, "<<<") == 0)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【<< >> <<< >>>】---【0x119】\n",lineCount, lineWords, str);
}
else if(strchr(str, '=') != NULL)
{
if(strcmp(str, "==") == 0 || strcmp(str, "!=") == 0)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【== !=】---【0x117】\n",lineCount, lineWords, str);
}
else
{
/*fprintf(fout, "%4ld.%-4ld 【%s】 【= += -= *= /= %%= &= ^= |= >>= <<= >>>= <<<=】---【0x110】\n",lineCount, lineWords, str);*/
fprintf(fout, "%4ld.%-4ld 【%s】 【特殊符号】---【0x110】\n",lineCount, lineWords, str);
}
}
else if(strcmp(str, "||") == 0)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【||】---【0x112】\n",lineCount, lineWords, str);
}
else if(strcmp(str, "&&") == 0)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【&&】---【0x113】\n",lineCount, lineWords, str);
}
else if(strcmp(str, "++") == 0 || strcmp(str, "--") == 0 || strcmp(str, "!") == 0 || strcmp(str, "~") == 0)
{
fprintf(fout, "%4ld.%-4ld 【%s】 【++ -- +(正) -(负) ! ~】---【0x11c】\n",lineCount, lineWords, str);
}
else if(strlen(str) == 1)
{
switch(str[0])
{
case '?': case ':': fprintf(fout, "%4ld.%-4ld 【%s】 【? :】---【0x111】\n",lineCount, lineWords, str); break;
case ' ': fprintf(fout, "%4ld.%-4ld 【%s】 【空格】---【0x102】\n",lineCount, lineWords, str); break;
case '{':case '}': fprintf(fout, "%4ld.%-4ld 【%s】 【{}】---【0x121】\n",lineCount, lineWords, str); break;
case '[':case ']':case '(':case ')':case '.': fprintf(fout, "%4ld.%-4ld 【%s】 【[]().】---【0x11d】\n",lineCount, lineWords, str); break;
case ',': fprintf(fout, "%4ld.%-4ld 【%s】 【,】---【0x120】\n",lineCount, lineWords, str); break;
case ';': fprintf(fout, "%4ld.%-4ld 【%s】 【;】---【0x122】\n",lineCount, lineWords, str); break;
case '+':case '-': fprintf(fout, "%4ld.%-4ld 【%s】 【+ -】---【0x11a】\n",lineCount, lineWords, str); break;
case '*':case '/':case '%': fprintf(fout, "%4ld.%-4ld 【%s】 【* / %%】---【0x11b】\n",lineCount, lineWords, str); break;
case '|': fprintf(fout, "%4ld.%-4ld 【%s】 【|】---【0x114】\n",lineCount, lineWords, str); break;
case '^': fprintf(fout, "%4ld.%-4ld 【%s】 【^】---【0x115】\n",lineCount, lineWords, str); break;
case '&': fprintf(fout, "%4ld.%-4ld 【%s】 【&】---【0x116】\n",lineCount, lineWords, str); break;
default: fprintf(fout, "%4ld.%-4ld 【%s】 【其他符号】---【0x999】\n",lineCount, lineWords, str); break;
}
}
}
else
{
fprintf(fout, "%4ld.%-4ld 【%s】 【错误的单词】---【0x100】\n",lineCount, lineWords, str);
}
}
/*******************从扫描缓冲区解析获取单词********************
state说明:
0表示初始,10表示只含有字母,20表示含有数字,30表示含有$或_,40表示含有其他, 50表示读取完一个单词
41表示字符串,42表示字符
****************************************************************/
void getWords(int state)
{
char word[100];
int charCount = 0;
int finish = 0;
int i, j, k;
for(i = 0; scanBuf[i] != '\0'; i++)
{
switch(state/10)
{
case 0:
switch(getCharKind(scanBuf[i]))
{
case 1:
word[charCount++] = scanBuf[i];
state = 10;
break;
case 2:
word[charCount++] = scanBuf[i];
state = 20;
break;
case 3:
word[charCount++] = scanBuf[i];
state = 30;
break;
case 0:
word[charCount++] = scanBuf[i];
switch(scanBuf[i])
{
case '"':
state = 41;
break;
case '\'':
state = 42;
break;
case '(': case ')': case '{': case '}': case '[': case ']': case ';': case ',': case '.':
state = 50;
word[charCount] = '\0';
finish = 1;
break;
case '=':
state = 43;
break;
default:
state = 40;
break;
}
break;
default: word[charCount++] = scanBuf[i]; break;
}
break;
case 1:
switch(getCharKind(scanBuf[i]))
{
case 1:
word[charCount++] = scanBuf[i];
state = 10;
break;
case 2:
word[charCount++] = scanBuf[i];
state = 20;
break;
case 3:
word[charCount++] = scanBuf[i];
state = 30;
break;
case 0:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
default: word[charCount++] = scanBuf[i];break;
}
break;
case 2:
switch(getCharKind(scanBuf[i]))
{
case 1:
word[charCount++] = scanBuf[i];
state = 20;
break;
case 2:
word[charCount++] = scanBuf[i];
state = 20;
break;
case 3:
word[charCount++] = scanBuf[i];
state = 30;
break;
case 0:
if(scanBuf[i] == '.')
{
word[charCount++] = scanBuf[i];
state = 20;
break;
}
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
default: word[charCount++] = scanBuf[i];break;
}
break;
case 3:
switch(getCharKind(scanBuf[i]))
{
case 1:
word[charCount++] = scanBuf[i];
state = 30;
break;
case 2:
word[charCount++] = scanBuf[i];
state = 30;
break;
case 3:
word[charCount++] = scanBuf[i];
state = 30;
break;
case 0:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
default: word[charCount++] = scanBuf[i];break;
}
break;
case 4:
switch(state)
{
case 40:
switch(getCharKind(scanBuf[i]))
{
case 1:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
case 2:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
case 3:
word[charCount] = '\0';
i--;
finish = 1;
state = 50;
break;
case 0:
word[charCount++] = scanBuf[i];
state = 40;
break;
default: word[charCount++] = scanBuf[i];break;
}
break;
case 41:
word[charCount++] = scanBuf[i];
if(scanBuf[i] == '"')
{
if(getCharKind(scanBuf[i-1]) == 4)
{
}
else
{
word[charCount] = '\0';
finish = 1;
state = 50;
}
}
break;
case 42:
word[charCount++] = scanBuf[i];
if(scanBuf[i] == '\'')
{
word[charCount] = '\0';
finish = 1;
state = 50;
}
break;
case 43:
if(scanBuf[i] == '=')
{
word[charCount++] = scanBuf[i];
state = 43;
}
else
{
word[charCount] = '\0';
finish = 1;
i--;
state = 50;
}
break;
default: word[charCount++] = scanBuf[i];break;
}
break;
case 5:
finish = 0;
state = 0;
charCount = 0;
i--;
lineWords++;
totalWords++;
getWordKind(word);
break;
default:break;
}
if(scanBuf[i+1] == '\0')
{
word[charCount] = '\0';
lineWords++;
totalWords++;
getWordKind(word);
}
}
}