自制词法分析
#ifndef TOKEN_H_INCLUDED
#define TOKEN_H_INCLUDED
typedef enum
{
BAD_TOKEN,
NUMBER_TOKEN,
ADD_OPERATOR_TOKEN,
SUB_OPERATOR_TOKEN,
MUL_OPERATOR_TOKEN,
DIV_OPERATOR_TOKEN,
END_OF_LINE_TOKEN
}TokenKind;
#define MAX_TOKEN_SIZE (100)
typedef struct {
TokenKind kind;
double value;
char str[MAX_TOKEN_SIZE];
} Token;
void set_line(char *line);
void get_token(Token *token);
#endif
#include "token.h"
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
static char *st_line;
static int st_line_pos;
typedef enum {
INITIAL_STATUS,
IN_INT_PART_STATUS,
DOT_STATUS,
IN_FRAC_PART_STATUS
} LexerStatus;
void get_token(Token *token)
{
int out_pos = 0;
LexerStatus status = INITIAL_STATUS;
char current_char;
token->kind = BAD_TOKEN;
while (st_line[st_line_pos] != '\0')
{
current_char = st_line[st_line_pos];
if ((status == IN_INT_PART_STATUS || status == IN_FRAC_PART_STATUS)
&& !isdigit(current_char) && current_char != '.')
{ //当前字符不是数字也不是小数点,但当前状态是整数或者小数,标记为数字并返回
token->kind = NUMBER_TOKEN;
sscanf(token->str, "%lf", &token->value);
return;
}
if (isspace(current_char))
{
//当前是否是空格字符
if (current_char == '\n') {
token->kind = END_OF_LINE_TOKEN;
return;
}
st_line_pos++;
continue;
}
if (out_pos >= MAX_TOKEN_SIZE-1)
{ //当前字符长度超过预定义最大值
fprintf(stderr, "token too long.\n");
exit(1);
}
token->str[out_pos] = st_line[st_line_pos];
st_line_pos++;
out_pos++;
token->str[out_pos] = '\0';
if (current_char == '+') {
token->kind = ADD_OPERATOR_TOKEN;
return;
} else if (current_char == '-') {
token->kind = SUB_OPERATOR_TOKEN;
return;
} else if (current_char == '*') {
token->kind = MUL_OPERATOR_TOKEN;
return;
} else if (current_char == '/') {
token->kind = DIV_OPERATOR_TOKEN;
return;
}
else if (isdigit(current_char))
{ //如果当前字符是数字状态变为整数,或者小数
if (status == INITIAL_STATUS) {
status = IN_INT_PART_STATUS;
} else if (status == DOT_STATUS) {
status = IN_FRAC_PART_STATUS;
}
} else if (current_char == '.') {
if (status == IN_INT_PART_STATUS) {
status = DOT_STATUS;
} else {
fprintf(stderr, "syntax error.\n");
exit(1);
}
} else {
fprintf(stderr, "bad character(%c)\n", current_char);
exit(1);
}
}
}
void set_line(char *line)
{
st_line = line;
st_line_pos = 0;
}
void parse_line(char *buf)
{
Token token;
set_line(buf);
for (;;) {
get_token(&token);
if (token.kind == END_OF_LINE_TOKEN) {
break;
} else {
printf("kind..%d, str..%s\n", token.kind, token.str);
}
}
}
int main(int argc, char **argv)
{
char buf[1024];
while (fgets(buf, 1024, stdin) != NULL) {
parse_line(buf);
}
return 0;
}
实际上是对每个字符进行转换,要注意的是数字。用到了状态机来读取小数。