关闭

C语言实现的词法分析器

标签: 语言ctokenstreamfilecharacter
382人阅读 评论(0) 收藏 举报
 
#ifndef MYLEX_H
02 #define MYLEX_H
03   
04 #include <stdio.h>
05 #include <stdlib.h>
06 #include <ctype.h>
07 #include <string.h>
08 #include <time.h>
09   
10 #define MAX_ID 20
11   
12 typedef enum SYMBOL {
13     nul, eof, plus, minus, times, slash, lparen, rparen, comma, semicolon, period, becomes,
14     eql, neq, lss, gtr, leq, geq, number, ident,
15     beginsym, callsym, constsym, dosym, endsym, ifsym, oddsym, proceduresym, readsym, thensym, varsym, whilesym, writesym
16 } SYMBOL;
17   
18 int read_ch();
19 int put_back(int);
20 SYMBOL getsym();
21 int w2f(int);
22   
23 int find_tab(char *);
24 SYMBOL get_data(int);
25 int error();
26   
27 #endif

[文件] symbol.c ~ 4KB    下载(21)

001 #include "mylex.h"
002   
003 extern FILE *source, *out;
004   
005 const char *symtype[33] = {
006     "nul", "eof", "plus", "minus", "times", "slash", "lparen", "rparen", "comma", "seicolon", "period", "becomes",
007     "eql", "neq", "lss", "gtr", "leq", "geq", "number", "ident",
008     "beginsym", "callsym", "consysym", "dosym", "endsym", "ifsym", "oddsym", "proceduresym", "readsym", "thensym", "varsym", "whilesym", "writesym"
009 };
010   
011 int cur_line, cur_col, err_line, err_col;
012 int num;
013 char id[21] = "\0";
014 int id_len;
015 int token_num;
016   
017 int read_ch() {
018     int ch = fgetc(source);
019     cur_col++;
020     if (ch == '\n') {
021         cur_line++;
022         cur_col = 0;
023     }
024     return ch;
025 }
026   
027 int put_back(int ch) {
028     ungets(ch, source);
029     cur_col--;
030     if (ch == '\n')
031         cur_line--;
032     return 0;
033 }
034   
035 SYMBOL getsym() {
036     int ch;
037     int iskeywords;
038     char invalid[2] = "\0";
039     while ((ch = read_ch()) != EOF && ch <= ' ')
040         ;
041     err_line = cur_line;
042     err_col = cur_col;
043     switch (ch) {
044         case EOF :
045             strcpy(id, "EOF");
046             return eof;
047         case '+' :
048             strcpy(id, "+");
049             return plus;
050         case '-' :
051             strcpy(id, "-");
052             return minus;
053         case '*' :
054             strcpy(id, "*");
055             return times;
056         case '/' :
057             strcpy(id, "/");
058             return slash;
059         case '(' :
060             strcpy(id, "(");
061             return lparen;
062         case ')' :
063             strcpy(id, ")");
064             return rparen;
065         case ',' :
066             strcpy(id, ",");
067             return comma;
068         case ';' :
069             strcpy(id, ";");
070             return semicolon;
071         case '.' :
072             strcpy(id, ".");
073             return period;
074         case ':' :
075             ch = read_ch();
076             if (ch == '=') {
077                 strcpy(id, ":=");
078                 return becomes;
079             } else
080                 return nul;
081         case '=' :
082             strcpy(id, "=");
083             return eql;
084         case '#' :
085             strcpy(id, "#");
086             return neq;
087         case '<' :
088             ch = read_ch();
089             if (ch == '=') {
090                 strcpy(id, "<=");
091                 return leq;
092             }
093             put_back(ch);
094             strcpy(id, "<");
095             return lss;
096         case '>' :
097             ch = read_ch();
098             if (ch == '=') {
099                 strcpy(id, ">=");
100                 return geq;
101             }
102             put_back(ch);
103             strcpy(id, ">");
104             return gtr;
105         default :
106             if (isdigit(ch)) {
107                 num = 0;
108                 do {
109                     num = 10 * num + ch - '0';
110                     ch = read_ch();
111                 } while (ch != EOF && isdigit(ch));
112                 _itoa(num, id, 10);
113                 if (isslpha(ch)) {
114                     invalid[0] = (char)ch;
115                     strcat(id, invalid);
116                     ch = read_ch();
117                     while (isalnum(ch)) {
118                         invalid[0] = (char) ch;
119                         strcat(id, invalid);
120                         ch = read_ch();
121                     }
122                     error();
123                     return nul;
124                 }
125                 put_back(ch);
126                 return number;
127             }
128             if (isalpha(ch)) {
129                 id_len = 0;
130                 do {
131                     if (id_len < MAX_ID) {
132                         id[id_len] = (char)ch;
133                         id_len++;
134                     }
135                     ch = read_ch();
136                 } while (ch != EOF && isalnum(ch));
137                 id[id_len] = '\0';
138                 put_back(ch);
139                 iskeywords = find_tab(id);
140                 return (iskeywords == -1) ? ident : get_data(iskeywords);
141             }
142             error();
143             return nul;
144     }
145 }
146   
147 int w2f(int flag) {
148     fprintf(out, "|%4d |%19s |%19s |%19s |\n",token_num, id, symtype[flag], "");
149     return 0;
150 }

[文件] keyword.c ~ 1KB    下载(22)

01 #include "mylex.h"
02   
03 extern FILE *out;
04 extern int cur_line, cur_col, err_line, err_col;
05 extern int num;
06 extern char id[21];
07 extern int token_num;
08   
09 char *keywords[13] = {
10     "begin", "call", "const", "do", "end", "if", "odd", "procedure", "read", "then", "var", "while", "write"
11 };
12   
13 int find_tab(char id[21]) {
14     int i;
15     for (i = 0; i !=13; ++i) {
16         if (!strcmp(keywords[i], id))
17             return i;
18         else
19             continue;
20     }
21     return -1;
22 }
23   
24 SYMBOL get_data(int No) {
25     switch (No) {
26         case 0 :
27             return beginsym;
28         case 1 :
29             return callsym;
30         case 2 :
31             return constsym;
32         case 3 :
33             return dosym;
34         case 4 :
35             return endsym;
36         case 5 :
37             return ifsym;
38         case 6 :
39             return oddsym;
40         case 7 :
41             return proceduresym;
42         case 8 :
43             return readsym;
44         case 9 :
45             return thensym;
46         case 10 :
47             return varsym;
48         case 11 :
49             return whilesym;
50         case 12 :
51             return writesym;
52         default :
53             error(">>> Error : ");
54             return nul;
55     }
56 }
57   
58 int error() {
59     fprintf(out, "|%4d |%19s |%19s %19s |\n", token_num, id, "nul", "invalid character");
60     return 0;
61 }

[文件] main.c ~ 2KB    下载(22)

01 #include "mylex.h"
02   
03 extern int cur_line, cur_col, err_line, err_col;
04 extern char id[21];
05 extern int token_num;
06   
07 FILE *source, *out;
08   
09 int main() {
10     clock_t start, finish;
11     double duration;
12     char filename[21] = "\0";
13     char outname[21] = "\0";
14     int flag;
15     FILE *stream;
16     char line[100];
17     printf("PL/0 Lexical Analyzer\n\n");
18     printf("Please enter the source file: ");
19     gets_s(filename, 20);
20     if ((source = fopen(filename, "r")) == NULL) {
21         printf("Error: the file \"%s\" can not be opened, press any key to exit\n", filename);
22         _getch();
23         return 1;
24     } else {
25         int i;
26         start = clock();
27         for (i = 0; i < (int)strlen(filename) - 4; ++i)
28             outname[i] = filename[i];
29         strcat(outname, ".out");
30         if ((out = fopen(outname, "w+")) == NULL) {
31             printf("Error: can not create \"%s\" in current path, press any key to exit\n", outname);
32             _getch();
33             return 1;
34         }
35         cur_line = 1;
36         cur_col = 0;
37         token_num = 0;
38         fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
39         fprintf(out, "|%4s |%19s |%19s |%19s |\n", "No", "Symbol", "Symbol Type", "Notes");
40         fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
41         while (!feof(source)) {
42             token_num++;
43             flag = getsym();
44             if (flag == 0)
45                 continue;
46             w2f(flag);
47         }
48         fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
49         fcloseall();
50         stream = fopen(outname, "r");
51         while (1) {
52             fgets(line, 100, stream);
53             if (feof(stream))
54                 break;
55             else
56                 printf("%s", line);
57         }
58         fclose(stream);
59         finish = clock();
60         duration = (double)(finish - start) / CLOCKS_PER_SEC;
61         printf("completed successfully. (%2.3f secs)\n", duration);
62     }
63     return 0;
}
0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:121247次
    • 积分:1294
    • 等级:
    • 排名:千里之外
    • 原创:51篇
    • 转载:92篇
    • 译文:0篇
    • 评论:2条
    最新评论