c语言写的Java词法分析

[代码] [C/C++]代码
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define FILENAME "e:/a.java"

enum type
{

blz,//保留字
bzf,//标识符
cs,//常数
zfcl,//字符常量
zfccl,//字符串常量
dzf,//单字符
szf//双字符
};

typedef struct output
{
int type;
union{

char *_strvalue;
char _charvalue;
int _intvalue;
float _floatvalue;
double _doublevalue;

}value;

struct output *pnext;
struct output *ppre;

} output;

int linenumber=0;

char getAChar(FILE *pfile);
char *getString(FILE *pfile);
char isDzf(char c);
output *createOuput(output *srcolutput);
char *getTypeChars(char c,FILE *pfile);
output *process(FILE *pfile);

char *constans_pool[]={
"package",
"class",
"int",
"double",
"float",
"byte",
"boolean",
"char",
"short",
"public",
"private",
"protocted",
"synchronized",
"instanceof",
"extends",
"implements",
"throw",
"throws",
"if",
"else",
"for",
"while",
"return",
"continue",
"break",
"switch",
"case","void","keyword"};

int main()
{



output *phead=NULL,*pcurt=NULL;
FILE *pfile=fopen(FILENAME,"r");
if(pfile)
phead=process(pfile);


if(phead)
{
pcurt=phead;
while(pcurt)
{

switch(pcurt->type)
{
case blz:

printf("<b>%s<b>\t",pcurt->value._strvalue);
break;
case bzf:
case zfccl:
case szf:
printf("%s\t",pcurt->value._strvalue);
break;
case zfcl:
case dzf:
printf("%c\t",pcurt->value._charvalue);
break;
case cs:
printf("%d\t",pcurt->value._intvalue);
break;


}

pcurt=pcurt->pnext;
}

}




return 0;
}

output *process(FILE *pfile)
{
char nowchar=0,prechar=0;
output *phead=NULL,*pcurt=NULL,*pnext=NULL,*ppre=NULL;
output tmp;
char *p=NULL;
char *pnowglz=NULL;
int i=0;


while(!feof(pfile))
{
prechar=nowchar;
nowchar=getAChar(pfile);



//如果是字幕
if(isalpha(nowchar))
{

p=getTypeChars(nowchar,pfile);

tmp.type=bzf;
while(strcmp(constans_pool[i],"keyword"))
{

if(strcmp(constans_pool[i],p))
{
tmp.type=blz;
break;
}
}
tmp.value._strvalue=p;

}
//如果是数字
else if(isdigit(nowchar))
{
p=getTypeChars(nowchar,pfile);
tmp.type=cs;
tmp.value._intvalue=atoi(p);

if(phead==NULL)
phead=pcurt;


}
else if(nowchar=='/')
{
nowchar=fgetc(pfile);
if(nowchar=='*')//表明是注释;
{

do
{
while(nowchar=getAChar(pfile)!='*'&&nowchar>0);
nowchar=fgetc(pfile);
}
while(nowchar!='/');

continue;
}
else if(nowchar=='/')//单行注释
{
while((nowchar=fgetc(pfile))!='\n'&&nowchar>0);
continue;
}
else
{
tmp.type=dzf;
tmp.value._charvalue='/';
ungetc(nowchar,pfile);
}
}
else if(nowchar=='=')
{
nowchar=getAChar(pfile);

if(nowchar=='=')
{

tmp.type=bzf;
*(tmp.value._strvalue)='=';
*(tmp.value._strvalue+1)='=';

}
else
{
ungetc(nowchar,pfile);
continue;
}

}
else if(nowchar=='\'')
{

nowchar=fgetc(pfile);
if(fgetc(pfile)!='\'')
printf("此处应该是单引号");
else
{
tmp.type=zfcl;
tmp.value._charvalue=nowchar;
}


}
else if(nowchar=='\"')
{
p=getString(pfile);
tmp.type=zfccl;
tmp.value._strvalue=p;

}


else if(isDzf(nowchar))
{
tmp.type=dzf;
tmp.value._charvalue=nowchar;
}
else if(nowchar<0)

// printf("end");
continue;



else
{
printf("\nerror%c\t%d\n",nowchar,linenumber);
continue;
}

phead=createOuput(&tmp);
}
return phead;

}
//判断是不是单字符
char isDzf(char c)
{
char dzfarr[]={'+','-','*','(',')','[',']','{','}',';','?','.','!','%'};

unsigned int i=0;
for(;i<sizeof(dzfarr)/sizeof(char);i++)
{
if(dzfarr[i]==c)
return dzfarr[i];

}
return 0;

}
//得到字符串常量
char *getString(FILE *pfile)
{

char nowchar[]={0,0};
char *p=NULL;
unsigned int size=4;

while((nowchar[0]=fgetc(pfile))>0&&nowchar[0]!='\"')
{

if(!p)
{
p=(char *)malloc(sizeof(char)*size);
*p=0;
}
strcat(p,nowchar);

if(strlen(p)>=size)
{
p=realloc(p,size*=2);
*(p+size/2)=0;

}

}
return p;

}

//读取同一个类型的字符加入到指针只到出现不同类型的字符
char *getTypeChars(char c,FILE *pfile)
{

char nowchar[]={c,'\0'};
char *p=NULL;
int (*pchartype)(int)=NULL;
unsigned int str_size=4;

if(isdigit(c))
pchartype=isdigit;
else if(isalnum(c))
pchartype=isalnum;

do
{
if(!p)
{
p=(char *)malloc(sizeof(char)*str_size);
*p=0;
}

strcat(p,nowchar);

if(strlen(p)>=str_size)
{
p=(char *)realloc(p,str_size*=2);
*(p+str_size/2)='\0';
}

nowchar[0]=fgetc(pfile);

}while(nowchar[0]>0&&pchartype(nowchar[0]));

ungetc(nowchar[0],pfile);
return p;

}
//创建output的链表
output *createOuput(output *srcolutput)
{

static output *pnext=NULL;
static output *ppre=NULL;
static output*phead=NULL;
output *pout=NULL;
pout=(output *)malloc(sizeof(output));
pout->pnext=NULL;
pout->type=srcolutput->type;
switch(srcolutput->type)
{

case blz:
case bzf:
case zfccl:
case szf:
pout->value._strvalue=(char *)malloc(sizeof(char)*strlen(srcolutput->value._strvalue));
*(pout->value._strvalue)=0;
strcpy(pout->value._strvalue,srcolutput->value._strvalue);
free(srcolutput->value._strvalue);
break;
case zfcl:
case dzf:
pout->value._charvalue=srcolutput->value._charvalue;
break;
case cs:
pout->value._intvalue=srcolutput->value._intvalue;
break;

}

if(!phead)
phead=pout;
if(ppre)
ppre->pnext=pout;

ppre=pout;

return phead;

}
{[url=http://www.huiyi8.com/java/]jave语言[/url]}
//返回一个字符
//这个字符不为空字符。
char getAChar(FILE *pfile)
{
char c=0;

if(!pfile)
return EOF;

while((c=fgetc(pfile))=='\t'||c=='\0'||c==' '||c=='\n')
{
if(c=='\n')
linenumber++;
}

return c;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
实现一个C语言词法分析器的Java代码,可以参考以下步骤: 1. 定义一个Token类,用于表示词法单元的类型和对应的值。 ``` public class Token { private TokenType type; private String value; public Token(TokenType type, String value) { this.type = type; this.value = value; } public TokenType getType() { return type; } public String getValue() { return value; } @Override public String toString() { return String.format("(%s, %s)", type, value); } } ``` 2. 定义一个TokenType枚举类,用于表示所有可能的词法单元类型。 ``` public enum TokenType { KEYWORD, IDENTIFIER, CONSTANT, STRING_LITERAL, OPERATOR, PUNCTUATOR } ``` 3. 实现词法分析器的核心逻辑,包括读取源代码、识别词法单元并返回Token对象等。 ``` public class Lexer { private String input; private int position; public Lexer(String input) { this.input = input; this.position = 0; } public Token getNextToken() { if (position >= input.length()) { return null; } char currentChar = input.charAt(position); if (isLetter(currentChar)) { return scanIdentifier(); } if (isDigit(currentChar)) { return scanNumber(); } if (currentChar == '\"') { return scanStringLiteral(); } if (isOperator(currentChar)) { return scanOperator(); } if (isPunctuator(currentChar)) { return scanPunctuator(); } throw new IllegalArgumentException("Invalid character: " + currentChar); } private boolean isLetter(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } private boolean isDigit(char c) { return c >= '0' && c <= '9'; } private boolean isOperator(char c) { return "+-*/%=&|<>!".indexOf(c) != -1; } private boolean isPunctuator(char c) { return "()[]{};,".indexOf(c) != -1; } private Token scanIdentifier() { StringBuilder sb = new StringBuilder(); while (position < input.length() && (isLetter(input.charAt(position)) || isDigit(input.charAt(position)))) { sb.append(input.charAt(position)); position++; } String value = sb.toString(); TokenType type = TokenType.IDENTIFIER; if (isKeyword(value)) { type = TokenType.KEYWORD; } return new Token(type, value); } private boolean isKeyword(String value) { String[] keywords = {"auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"}; return Arrays.asList(keywords).contains(value); } private Token scanNumber() { StringBuilder sb = new StringBuilder(); while (position < input.length() && isDigit(input.charAt(position))) { sb.append(input.charAt(position)); position++; } return new Token(TokenType.CONSTANT, sb.toString()); } private Token scanStringLiteral() { StringBuilder sb = new StringBuilder(); position++; // skip the opening quotation mark while (position < input.length() && input.charAt(position) != '\"') { sb.append(input.charAt(position)); position++; } position++; // skip the closing quotation mark return new Token(TokenType.STRING_LITERAL, sb.toString()); } private Token scanOperator() { StringBuilder sb = new StringBuilder(); while (position < input.length() && isOperator(input.charAt(position))) { sb.append(input.charAt(position)); position++; } return new Token(TokenType.OPERATOR, sb.toString()); } private Token scanPunctuator() { Token token = new Token(TokenType.PUNCTUATOR, String.valueOf(input.charAt(position))); position++; return token; } } ``` 4. 编一个测试程序,用于读取输入的源代码,调用词法分析器并输出词法单元。 ``` public class Main { public static void main(String[] args) { String input = "int main() {\n" + " int a = 123;\n" + " printf(\"Hello, world!\\n\");\n" + " return 0;\n" + "}"; Lexer lexer = new Lexer(input); Token token; while ((token = lexer.getNextToken()) != null) { System.out.println(token); } } } ``` 输出结果如下: ``` (KEYWORD, int) (IDENTIFIER, main) (PUNCTUATOR, () (PUNCTUATOR, )) (PUNCTUATOR, {) (KEYWORD, int) (IDENTIFIER, a) (PUNCTUATOR, =) (CONSTANT, 123) (PUNCTUATOR, ;) (IDENTIFIER, printf) (PUNCTUATOR, () (STRING_LITERAL, Hello, world!\n) (PUNCTUATOR, )) (PUNCTUATOR, ;) (KEYWORD, return) (CONSTANT, 0) (PUNCTUATOR, ;) (PUNCTUATOR, }) ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值