C语言实现的词法分析器

 
#ifndef MYLEX_H
02#define MYLEX_H
03  
04#include <stdio.h>
05#include <stdlib.h>
06#include <ctype.h>
07#include <string.h>
08#include <time.h>
09  
10#define MAX_ID 20
11  
12typedef enum SYMBOL {
13    nul, eof, plus, minus, times, slash, lparen, rparen, comma, semicolon, period, becomes,
14    eql, neq, lss, gtr, leq, geq, number, ident,
15    beginsym, callsym, constsym, dosym, endsym, ifsym, oddsym, proceduresym, readsym, thensym, varsym, whilesym, writesym
16} SYMBOL;
17  
18int read_ch();
19int put_back(int);
20SYMBOL getsym();
21int w2f(int);
22  
23int find_tab(char *);
24SYMBOL get_data(int);
25int error();
26  
27#endif

[文件] symbol.c ~ 4KB    下载(21)

001#include "mylex.h"
002  
003extern FILE *source, *out;
004  
005const char *symtype[33] = {
006    "nul", "eof", "plus", "minus", "times", "slash", "lparen", "rparen", "comma", "seicolon", "period", "becomes",
007    "eql", "neq", "lss", "gtr", "leq", "geq", "number", "ident",
008    "beginsym", "callsym", "consysym", "dosym", "endsym", "ifsym", "oddsym", "proceduresym", "readsym", "thensym", "varsym", "whilesym", "writesym"
009};
010  
011int cur_line, cur_col, err_line, err_col;
012int num;
013char id[21] = "\0";
014int id_len;
015int token_num;
016  
017int read_ch() {
018    int ch = fgetc(source);
019    cur_col++;
020    if (ch == '\n') {
021        cur_line++;
022        cur_col = 0;
023    }
024    return ch;
025}
026  
027int put_back(int ch) {
028    ungets(ch, source);
029    cur_col--;
030    if (ch == '\n')
031        cur_line--;
032    return 0;
033}
034  
035SYMBOL getsym() {
036    int ch;
037    int iskeywords;
038    char invalid[2] = "\0";
039    while ((ch = read_ch()) != EOF && ch <= ' ')
040        ;
041    err_line = cur_line;
042    err_col = cur_col;
043    switch (ch) {
044        case EOF :
045            strcpy(id, "EOF");
046            return eof;
047        case '+' :
048            strcpy(id, "+");
049            return plus;
050        case '-' :
051            strcpy(id, "-");
052            return minus;
053        case '*' :
054            strcpy(id, "*");
055            return times;
056        case '/' :
057            strcpy(id, "/");
058            return slash;
059        case '(' :
060            strcpy(id, "(");
061            return lparen;
062        case ')' :
063            strcpy(id, ")");
064            return rparen;
065        case ',' :
066            strcpy(id, ",");
067            return comma;
068        case ';' :
069            strcpy(id, ";");
070            return semicolon;
071        case '.' :
072            strcpy(id, ".");
073            return period;
074        case ':' :
075            ch = read_ch();
076            if (ch == '=') {
077                strcpy(id, ":=");
078                return becomes;
079            } else
080                return nul;
081        case '=' :
082            strcpy(id, "=");
083            return eql;
084        case '#' :
085            strcpy(id, "#");
086            return neq;
087        case '<' :
088            ch = read_ch();
089            if (ch == '=') {
090                strcpy(id, "<=");
091                return leq;
092            }
093            put_back(ch);
094            strcpy(id, "<");
095            return lss;
096        case '>' :
097            ch = read_ch();
098            if (ch == '=') {
099                strcpy(id, ">=");
100                return geq;
101            }
102            put_back(ch);
103            strcpy(id, ">");
104            return gtr;
105        default :
106            if (isdigit(ch)) {
107                num = 0;
108                do {
109                    num = 10 * num + ch - '0';
110                    ch = read_ch();
111                } while (ch != EOF && isdigit(ch));
112                _itoa(num, id, 10);
113                if (isslpha(ch)) {
114                    invalid[0] = (char)ch;
115                    strcat(id, invalid);
116                    ch = read_ch();
117                    while (isalnum(ch)) {
118                        invalid[0] = (char) ch;
119                        strcat(id, invalid);
120                        ch = read_ch();
121                    }
122                    error();
123                    return nul;
124                }
125                put_back(ch);
126                return number;
127            }
128            if (isalpha(ch)) {
129                id_len = 0;
130                do {
131                    if (id_len < MAX_ID) {
132                        id[id_len] = (char)ch;
133                        id_len++;
134                    }
135                    ch = read_ch();
136                } while (ch != EOF && isalnum(ch));
137                id[id_len] = '\0';
138                put_back(ch);
139                iskeywords = find_tab(id);
140                return (iskeywords == -1) ? ident : get_data(iskeywords);
141            }
142            error();
143            return nul;
144    }
145}
146  
147int w2f(int flag) {
148    fprintf(out, "|%4d |%19s |%19s |%19s |\n",token_num, id, symtype[flag], "");
149    return 0;
150}

[文件] keyword.c ~ 1KB    下载(22)

01#include "mylex.h"
02  
03extern FILE *out;
04extern int cur_line, cur_col, err_line, err_col;
05extern int num;
06extern char id[21];
07extern int token_num;
08  
09char *keywords[13] = {
10    "begin", "call", "const", "do", "end", "if", "odd", "procedure", "read", "then", "var", "while", "write"
11};
12  
13int find_tab(char id[21]) {
14    int i;
15    for (i = 0; i !=13; ++i) {
16        if (!strcmp(keywords[i], id))
17            return i;
18        else
19            continue;
20    }
21    return -1;
22}
23  
24SYMBOL get_data(int No) {
25    switch (No) {
26        case 0 :
27            return beginsym;
28        case 1 :
29            return callsym;
30        case 2 :
31            return constsym;
32        case 3 :
33            return dosym;
34        case 4 :
35            return endsym;
36        case 5 :
37            return ifsym;
38        case 6 :
39            return oddsym;
40        case 7 :
41            return proceduresym;
42        case 8 :
43            return readsym;
44        case 9 :
45            return thensym;
46        case 10 :
47            return varsym;
48        case 11 :
49            return whilesym;
50        case 12 :
51            return writesym;
52        default :
53            error(">>> Error : ");
54            return nul;
55    }
56}
57  
58int error() {
59    fprintf(out, "|%4d |%19s |%19s %19s |\n", token_num, id, "nul", "invalid character");
60    return 0;
61}

[文件] main.c ~ 2KB    下载(22)

01#include "mylex.h"
02  
03extern int cur_line, cur_col, err_line, err_col;
04extern char id[21];
05extern int token_num;
06  
07FILE *source, *out;
08  
09int main() {
10    clock_t start, finish;
11    double duration;
12    char filename[21] = "\0";
13    char outname[21] = "\0";
14    int flag;
15    FILE *stream;
16    char line[100];
17    printf("PL/0 Lexical Analyzer\n\n");
18    printf("Please enter the source file: ");
19    gets_s(filename, 20);
20    if ((source = fopen(filename, "r")) == NULL) {
21        printf("Error: the file \"%s\" can not be opened, press any key to exit\n", filename);
22        _getch();
23        return 1;
24    } else {
25        int i;
26        start = clock();
27        for (i = 0; i < (int)strlen(filename) - 4; ++i)
28            outname[i] = filename[i];
29        strcat(outname, ".out");
30        if ((out = fopen(outname, "w+")) == NULL) {
31            printf("Error: can not create \"%s\" in current path, press any key to exit\n", outname);
32            _getch();
33            return 1;
34        }
35        cur_line = 1;
36        cur_col = 0;
37        token_num = 0;
38        fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
39        fprintf(out, "|%4s |%19s |%19s |%19s |\n", "No", "Symbol", "Symbol Type", "Notes");
40        fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
41        while (!feof(source)) {
42            token_num++;
43            flag = getsym();
44            if (flag == 0)
45                continue;
46            w2f(flag);
47        }
48        fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
49        fcloseall();
50        stream = fopen(outname, "r");
51        while (1) {
52            fgets(line, 100, stream);
53            if (feof(stream))
54                break;
55            else
56                printf("%s", line);
57        }
58        fclose(stream);
59        finish = clock();
60        duration = (double)(finish - start) / CLOCKS_PER_SEC;
61        printf("completed successfully. (%2.3f secs)\n", duration);
62    }
63    return 0;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值