C语言实现的词法分析器

转载 2012年03月21日 20:23:03
 
#ifndef MYLEX_H
02 #define MYLEX_H
03   
04 #include <stdio.h>
05 #include <stdlib.h>
06 #include <ctype.h>
07 #include <string.h>
08 #include <time.h>
09   
10 #define MAX_ID 20
11   
12 typedef enum SYMBOL {
13     nul, eof, plus, minus, times, slash, lparen, rparen, comma, semicolon, period, becomes,
14     eql, neq, lss, gtr, leq, geq, number, ident,
15     beginsym, callsym, constsym, dosym, endsym, ifsym, oddsym, proceduresym, readsym, thensym, varsym, whilesym, writesym
16 } SYMBOL;
17   
18 int read_ch();
19 int put_back(int);
20 SYMBOL getsym();
21 int w2f(int);
22   
23 int find_tab(char *);
24 SYMBOL get_data(int);
25 int error();
26   
27 #endif

[文件] symbol.c ~ 4KB    下载(21)

001 #include "mylex.h"
002   
003 extern FILE *source, *out;
004   
005 const char *symtype[33] = {
006     "nul", "eof", "plus", "minus", "times", "slash", "lparen", "rparen", "comma", "seicolon", "period", "becomes",
007     "eql", "neq", "lss", "gtr", "leq", "geq", "number", "ident",
008     "beginsym", "callsym", "consysym", "dosym", "endsym", "ifsym", "oddsym", "proceduresym", "readsym", "thensym", "varsym", "whilesym", "writesym"
009 };
010   
011 int cur_line, cur_col, err_line, err_col;
012 int num;
013 char id[21] = "\0";
014 int id_len;
015 int token_num;
016   
017 int read_ch() {
018     int ch = fgetc(source);
019     cur_col++;
020     if (ch == '\n') {
021         cur_line++;
022         cur_col = 0;
023     }
024     return ch;
025 }
026   
027 int put_back(int ch) {
028     ungets(ch, source);
029     cur_col--;
030     if (ch == '\n')
031         cur_line--;
032     return 0;
033 }
034   
035 SYMBOL getsym() {
036     int ch;
037     int iskeywords;
038     char invalid[2] = "\0";
039     while ((ch = read_ch()) != EOF && ch <= ' ')
040         ;
041     err_line = cur_line;
042     err_col = cur_col;
043     switch (ch) {
044         case EOF :
045             strcpy(id, "EOF");
046             return eof;
047         case '+' :
048             strcpy(id, "+");
049             return plus;
050         case '-' :
051             strcpy(id, "-");
052             return minus;
053         case '*' :
054             strcpy(id, "*");
055             return times;
056         case '/' :
057             strcpy(id, "/");
058             return slash;
059         case '(' :
060             strcpy(id, "(");
061             return lparen;
062         case ')' :
063             strcpy(id, ")");
064             return rparen;
065         case ',' :
066             strcpy(id, ",");
067             return comma;
068         case ';' :
069             strcpy(id, ";");
070             return semicolon;
071         case '.' :
072             strcpy(id, ".");
073             return period;
074         case ':' :
075             ch = read_ch();
076             if (ch == '=') {
077                 strcpy(id, ":=");
078                 return becomes;
079             } else
080                 return nul;
081         case '=' :
082             strcpy(id, "=");
083             return eql;
084         case '#' :
085             strcpy(id, "#");
086             return neq;
087         case '<' :
088             ch = read_ch();
089             if (ch == '=') {
090                 strcpy(id, "<=");
091                 return leq;
092             }
093             put_back(ch);
094             strcpy(id, "<");
095             return lss;
096         case '>' :
097             ch = read_ch();
098             if (ch == '=') {
099                 strcpy(id, ">=");
100                 return geq;
101             }
102             put_back(ch);
103             strcpy(id, ">");
104             return gtr;
105         default :
106             if (isdigit(ch)) {
107                 num = 0;
108                 do {
109                     num = 10 * num + ch - '0';
110                     ch = read_ch();
111                 } while (ch != EOF && isdigit(ch));
112                 _itoa(num, id, 10);
113                 if (isslpha(ch)) {
114                     invalid[0] = (char)ch;
115                     strcat(id, invalid);
116                     ch = read_ch();
117                     while (isalnum(ch)) {
118                         invalid[0] = (char) ch;
119                         strcat(id, invalid);
120                         ch = read_ch();
121                     }
122                     error();
123                     return nul;
124                 }
125                 put_back(ch);
126                 return number;
127             }
128             if (isalpha(ch)) {
129                 id_len = 0;
130                 do {
131                     if (id_len < MAX_ID) {
132                         id[id_len] = (char)ch;
133                         id_len++;
134                     }
135                     ch = read_ch();
136                 } while (ch != EOF && isalnum(ch));
137                 id[id_len] = '\0';
138                 put_back(ch);
139                 iskeywords = find_tab(id);
140                 return (iskeywords == -1) ? ident : get_data(iskeywords);
141             }
142             error();
143             return nul;
144     }
145 }
146   
147 int w2f(int flag) {
148     fprintf(out, "|%4d |%19s |%19s |%19s |\n",token_num, id, symtype[flag], "");
149     return 0;
150 }

[文件] keyword.c ~ 1KB    下载(22)

01 #include "mylex.h"
02   
03 extern FILE *out;
04 extern int cur_line, cur_col, err_line, err_col;
05 extern int num;
06 extern char id[21];
07 extern int token_num;
08   
09 char *keywords[13] = {
10     "begin", "call", "const", "do", "end", "if", "odd", "procedure", "read", "then", "var", "while", "write"
11 };
12   
13 int find_tab(char id[21]) {
14     int i;
15     for (i = 0; i !=13; ++i) {
16         if (!strcmp(keywords[i], id))
17             return i;
18         else
19             continue;
20     }
21     return -1;
22 }
23   
24 SYMBOL get_data(int No) {
25     switch (No) {
26         case 0 :
27             return beginsym;
28         case 1 :
29             return callsym;
30         case 2 :
31             return constsym;
32         case 3 :
33             return dosym;
34         case 4 :
35             return endsym;
36         case 5 :
37             return ifsym;
38         case 6 :
39             return oddsym;
40         case 7 :
41             return proceduresym;
42         case 8 :
43             return readsym;
44         case 9 :
45             return thensym;
46         case 10 :
47             return varsym;
48         case 11 :
49             return whilesym;
50         case 12 :
51             return writesym;
52         default :
53             error(">>> Error : ");
54             return nul;
55     }
56 }
57   
58 int error() {
59     fprintf(out, "|%4d |%19s |%19s %19s |\n", token_num, id, "nul", "invalid character");
60     return 0;
61 }

[文件] main.c ~ 2KB    下载(22)

01 #include "mylex.h"
02   
03 extern int cur_line, cur_col, err_line, err_col;
04 extern char id[21];
05 extern int token_num;
06   
07 FILE *source, *out;
08   
09 int main() {
10     clock_t start, finish;
11     double duration;
12     char filename[21] = "\0";
13     char outname[21] = "\0";
14     int flag;
15     FILE *stream;
16     char line[100];
17     printf("PL/0 Lexical Analyzer\n\n");
18     printf("Please enter the source file: ");
19     gets_s(filename, 20);
20     if ((source = fopen(filename, "r")) == NULL) {
21         printf("Error: the file \"%s\" can not be opened, press any key to exit\n", filename);
22         _getch();
23         return 1;
24     } else {
25         int i;
26         start = clock();
27         for (i = 0; i < (int)strlen(filename) - 4; ++i)
28             outname[i] = filename[i];
29         strcat(outname, ".out");
30         if ((out = fopen(outname, "w+")) == NULL) {
31             printf("Error: can not create \"%s\" in current path, press any key to exit\n", outname);
32             _getch();
33             return 1;
34         }
35         cur_line = 1;
36         cur_col = 0;
37         token_num = 0;
38         fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
39         fprintf(out, "|%4s |%19s |%19s |%19s |\n", "No", "Symbol", "Symbol Type", "Notes");
40         fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
41         while (!feof(source)) {
42             token_num++;
43             flag = getsym();
44             if (flag == 0)
45                 continue;
46             w2f(flag);
47         }
48         fprintf(out, "+-----+--------------------+--------------------+--------------------+\n");
49         fcloseall();
50         stream = fopen(outname, "r");
51         while (1) {
52             fgets(line, 100, stream);
53             if (feof(stream))
54                 break;
55             else
56                 printf("%s", line);
57         }
58         fclose(stream);
59         finish = clock();
60         duration = (double)(finish - start) / CLOCKS_PER_SEC;
61         printf("completed successfully. (%2.3f secs)\n", duration);
62     }
63     return 0;
}

一个简单词法分析器的C语言实现

一个简单词法分析器的C语言实现 1.1实验描述 例如:对源程序: begin x:=9: if x>9 then x:=2*x+1/3; end # 的源文件,...
  • chenglinhust
  • chenglinhust
  • 2013年06月13日 14:07
  • 7413

c语言实现编译原理词法分析器

词法分析器 :#include #include #include #include #includeint i, row = 0, line = 0; char a[1000]; //程序 int...
  • vivi_and_qiao
  • vivi_and_qiao
  • 2017年04月18日 20:00
  • 1440

PL/0 词法分析器设计 c语言

【实验内容】 用C余元编写一个PL/0词法分析器,为语法语义分析提供单词,使之能把输入的字符串形式的源程序风格成一个个单词符号传递给语法语义分析,并把分析结果(基本字,运算符,标识符,常熟以及界符)输...
  • Double2hao
  • Double2hao
  • 2016年04月20日 15:46
  • 2975

编译原理--C-Minus词法分析器C++实现

词法分析器的主要功能是把源代码整理成一个个记号(token),记号的类型主要有系统保留字(if,return等)、特殊字符(+,*,/等)、字符串记号(数字和标志符)。 如:str[i] = 45 ...
  • hiluo302
  • hiluo302
  • 2016年04月25日 14:57
  • 2138

基于C++的C语言词法分析器

#include #include #include using namespace std; char ch =' '; string key[12]={"int","continue","i...
  • Tomi_En
  • Tomi_En
  • 2015年07月10日 10:11
  • 968

Java编写的C语言词法分析器

Java编写的C语言词法分析器                      这是java编写的C语言词法分析器,我也是参考很多代码,然后将核心代码整理起来,准备放在QQ空间和博客上,目的是互相...
  • u014427391
  • u014427391
  • 2015年05月24日 16:07
  • 1793

简单词法分析器的实现

这是我们的一次编程作业,要求用C编写一个简单的词法分析器。要求如下: 编制一个单词获取程序,从文件输入的源程序中,识别出各个具有独立意义的单词,即关键字、标识符、整数、小数、字符串、分隔符、运算符等...
  • abc12580
  • abc12580
  • 2016年04月05日 18:59
  • 353

c语言词法分析器的简单实现

前不久编译原理学习了词法分析,自己实现了一个简单的c语言词法分析器,来加深对词法分析器原理,状态转换图,有限自动机的理解。当我们想在电脑上运行一个c语言程序时,都要将源程序进行编译。编译简单来说就是将...
  • hjh_walker
  • hjh_walker
  • 2016年11月04日 21:24
  • 3184

java实现C语言子集的语法分析器

如题,只是做一个算法的演示,所以并不能识别C语言全部的关键字,像int 等关键字会被识别为标识符。可以按照自己需求自行扩充保留字表。 程序功能: 词法分析器从input文件中读入一小段C语言源程序,以...
  • sinat_32092165
  • sinat_32092165
  • 2016年11月25日 17:57
  • 787

最简单的词法分析器

1、 实验目的: 设计、编制、调试一个词法分析程序,对单词进行识别和编码,加深对词法分析原理的理解。   2、 实验要求 (1) 允许用户自己输入源程序并保存为文件 (2) 系统能够输出经过...
  • weailily
  • weailily
  • 2016年05月09日 20:29
  • 3792
内容举报
返回顶部
收藏助手
不良信息举报
您举报文章:C语言实现的词法分析器
举报原因:
原因补充:

(最多只允许输入30个字)