c语言简直词法编译器,简单的C语言编译器--词法分析器

1. 定义词法单元Tag

首先要将可能出现的词进行分类,可以有不同的分类方式。如多符一类:将所有逗号、分号、括号等都归为一类,或者一符一类,将一个符号归为一类。我这里采用的是一符一类的方式。C代码如下:

#ifndef TAG_H

#define TAG_H

namespace Tag {

//保留字

const int

INT = 1, BOOL = 2, MAIN = 3, IF = 4,

ELSE = 5, FOR = 6, WHILE = 7, FALSE = 8,

BREAK = 9, RETURN = 10, TRUE = 11 ;

//运算符

const int

NOT = 20, NE = 21, AUTOMINUS =22, MINUS = 23,

AUTOADD = 24, ADD = 25, OR = 26,

AND = 27, MUTIPLY = 28, DIVIDE = 29, MOD = 30,

EQ = 31, ASSIN = 32, GE = 33, GT = 34,

LE = 35, LS = 36;

//分界符

const int

COMMA = 40, SEMICOLON = 41, LLBRACKET = 42,

RLBRACKET = 43, LMBRACKET = 44, RMBRACKET = 45,

LGBRACKET = 46, RGBRACKET = 47;

//整数常数

const int NUM = 50;

//标识符

const int ID = 60;

//错误

const int ERROR = 404;

//空

const int EMPTY = 70;

}

#endif

2. 具体步骤

一个一个字符地扫描测试代码,忽略空白字符,遇到回车时,记录行数加1

要进行区分标识符(即普通变量名字)和保留字

因为将标识符和常数都guiwe各自归为一类,所以要有算法能够识别出一整个常数和完整的标识符

加入适当的非法词检测

3. 设计词法分析类

设计一个词法分析器,当然要包括如何存储一个词法单元,如何扫描(scan)测试代码等,直接上代码:

myLexer.h

#ifndef MYLEXER_H

#define MYLEXER_H

#include

#include

#include

#include "tag.h"

/*

* 主要是定义基本的词法单元类,

* 声明了词法分析类

*/

//存储词法单元

class Word {

public:

Word() = default;

Word(std::string s, int t) : lexeme(s), tag(t) {};

std::string getLexeme() { return lexeme; };

int getTag() { return tag; }

void setTag(int t) { tag = t; }

void setLexeme(std::string s) { lexeme = s; }

private:

std::string lexeme;

int tag;

};

//词法分析器类

class Lexer {

public:

Lexer();

void reserve(Word w);

bool readnext(char c, std::ifstream &in);

Word scan(std::ifstream &in);

int getLine() { return line; }

private:

char peek;

std::unordered_map<:string word> words;

int line;

};

#endif

myLexer.cpp

#include

#include

#include

#include "myLexer.h"

void Lexer::reserve(Word w) {

words.insert({w.getLexeme(), w});

}

Lexer::Lexer() {

//存入保留字,为了区分标识符

reserve( Word("int", Tag::INT) );

reserve( Word("bool", Tag::BOOL) );

reserve( Word("main", Tag::MAIN) );

reserve( Word("if", Tag::IF) );

reserve( Word("else", Tag::ELSE) );

reserve( Word("for", Tag::FOR) );

reserve( Word("while", Tag::WHILE) );

reserve( Word("break", Tag::BREAK) );

reserve( Word("return", Tag::RETURN) );

reserve( Word("true", Tag::TRUE) );

reserve( Word("false", Tag::FALSE) );

peek = ' ';

line = 1;

}

//方便处理像>=,++等这些两个字符连在一起的运算符

bool Lexer::readnext(char c, std::ifstream &in) {

in >> peek;

if( peek != c)

return false;

peek = ' ';

return true;

}

Word Lexer::scan(std::ifstream &in) {

//跳过空白符

while(!in.eof()) {

if(peek == ' ' || peek == '') {

in >> peek;

continue;

}

else if(peek == '

')

++line;

else

break;

in >> peek;

}

//处理分界符、运算符等

switch(peek) {

case '!':

if(readnext('=', in))

return Word("!=", Tag::NE);

else

return Word("!", Tag::NOT);

case '-':

if(readnext('-', in))

return Word("--", Tag::AUTOMINUS);

else

return Word("-", Tag::MINUS);

case '+':

if(readnext('+', in))

return Word("++", Tag::AUTOADD);

else

return Word("+", Tag::ADD);

case '|':

if(readnext('|', in))

return Word("||", Tag::OR);

else

return Word("error", Tag::ERROR);

case '&':

if(readnext('&', in))

return Word("&&", Tag::AND);

else

return Word("error", Tag::ERROR);

case '*':

in >> peek;

return Word("*", Tag::MUTIPLY);

case '/':

in >> peek;

return Word("/", Tag::DIVIDE);

case '%':

in >> peek;

return Word("%", Tag::MOD);

case '=':

if(readnext('=', in))

return Word("==", Tag::EQ);

else

return Word("=", Tag::ASSIN);

case '>':

if(readnext('=', in))

return Word(">=", Tag::GE);

else

return Word(">", Tag::GT);

case '

if(readnext('=', in))

return Word("<=", Tag::LE);

else

return Word("

case ',':

in >> peek;

return Word(",", Tag::COMMA);

case ';':

in >> peek;

return Word(";", Tag::SEMICOLON);

case '(':

in >> peek;

return Word("(", Tag::LLBRACKET);

case ')':

in >> peek;

return Word(")", Tag::RLBRACKET);

case '[':

in >> peek;

return Word("[", Tag::LMBRACKET);

case ']':

in >> peek;

return Word("]", Tag::RMBRACKET);

case '{':

in >> peek;

return Word("{", Tag::LGBRACKET);

case '}':

in >> peek;

return Word("}", Tag::RGBRACKET);

}

//处理常数

if(isdigit(peek)) {

int v = 0;

do {

v = 10*v + peek - 48;

in >> peek;

} while(isdigit(peek));

if(peek != '.')

return Word(std::to_string(v), Tag::NUM);

}

//处理标识符

if(isalpha(peek)) {

std::ostringstream b;

do {

b << peek;

in >> peek;

} while(isalnum(peek) || peek == '_');

std::string tmp = b.str();

//判断是否为保留字

if(words.find(tmp) != words.end())

return words[tmp];

else

return Word(tmp, Tag::ID);

}

if(peek != ' ' && peek != '' && peek != '

')

return Word("error", Tag::ERROR);

return Word("empty", Tag::EMPTY);

}

设计完成后,自己写一个Main函数,在while循环中调用scan函数,每次打印出Word内容,就能够得到

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值