词法分析练习一:简单提取token
思路:
- 第一次碰到字母,下划线: 开启新token, 设置type=identify; 直到遇见非(字母,数字,下划线 截止), 则以上为一个标识符
- 第一次碰到数字: 开启新token, 设置type=number; 直到遇见非(数字,字母U,L,u,l)截止, 则以上为一个数字
typedef char bool;
#define true 1
#define false 0
#define CASE_ALPHA_LOWER case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': \
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
#define CASE_ALPHA_UPPER case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': \
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
#define CASE_NUM case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
#define CASE_UNDERLINE case '_':
#define CASE_LINEEND case '\0':
enum token_type
{
TOKEN_IDENTIFY,
TOKEN_NUMBER,
TOKEN_LINEFEED,
};
typedef struct token
{
enum token_type type;
char str[256];
unsigned char len;
} token_t;
void parse_token(char* str)
{
bool in_token = false;
token_t cur_token;
cur_token.len = 0;
while(1)
{
char c = *str++;
switch(c)
{
CASE_ALPHA_LOWER
CASE_ALPHA_UPPER
CASE_UNDERLINE
{
if (in_token == false) {
//first token set type
cur_token.type = TOKEN_IDENTIFY;
in_token = true;
}
//save char
cur_token.str[cur_token.len++] = c;
break;
}
CASE_NUM
{
if (in_token == false) {
//first token set type
cur_token.type = TOKEN_NUMBER;
in_token = true;
}
//save char
cur_token.str[cur_token.len++] = c;
break;
}
default:
{
if (in_token == true) {
//token end
cur_token.str[cur_token.len] = '\0';
if (cur_token.len) {
printf("token type %d: str [%s] \n", cur_token.type, cur_token.str);
//get back for next time parse from current char
str--;
}
else {
printf("warning unexpect here");
}
cur_token.len = 0;
in_token = false;
}
else {
//do nothing
printf("[%c]\n", c);
}
break;
}
}
if (c == '\0') {
printf("line end\n");
return;
}
}
}
int main(int argc, char* argv[])
{
printf("please input string to parse\n");
char string[1024];
while(1)
{
gets(string);
parse_token(string);
}
}
运行结果