编译原理之标识符拆分

14 篇文章 0 订阅
7 篇文章 0 订阅


#pragma once
#include<string>
#include<cctype>
#include <unordered_set>

using namespace std;

const unordered_set<string> keywords({
	"auto", "short", "int", "long", "float", "double",
	"char", "struct", "union", "enum", "typedef", "const",
	"unsigned", "signed", "extern", "register", "static",
	"volatile", "void", "if", "else", "switch", "for",
	"do", "while", "goto", "continue", "break", "default",
	"sizeof", "return"
});
#define iskey(ch) (keywords.count(ch))

const unordered_set<char> operators({
	'+', '-', '*' , '/', '%', '=',
	'!', '~', '&', '|', '(', ')',
	';', '>', '<'
});
#define isoper(ch) (operators.count(ch))

enum TokenType {
	KEY, ID, NUM, OP, OTH, NONE
};

class Tokenizer {
	string stmt;
	int idx;

public:
	Tokenizer() : idx(0) {}
	Tokenizer(string& _stmt) : stmt(_stmt), idx(0) {}

	void consume(string & _stmt) {
		stmt.swap(_stmt);
	}

	TokenType next(string& token);
	
	void reset() {
		idx = 0;
	}
};





#include "Tokenizer.h"

TokenType Tokenizer::next(string& token)
{
	token.clear();
	int cnt = 0;

	// skip spaces
	while (idx < stmt.length() && isspace(stmt[idx]))
		++idx;

	// parse token
	while (idx < stmt.length()) {
		// id
		if (isalpha(stmt[idx]) || stmt[idx] == '_') {
			size_t idx2 = idx;
			while (isalnum(stmt[idx]) || stmt[idx] == '_')
				if (++idx >= stmt.length())
					break;
			token.append(stmt.substr(idx2, idx - idx2));

			// check boundary
			if (!isspace(stmt[idx]) && !isoper(stmt[idx]))
				break;

			// check keywords
			if (iskey(token))
				return KEY;

			return ID;
		}

		// num
		else if (isalnum(stmt[idx]) && !isalpha(stmt[idx]) || stmt[idx] == '.') {
			// read hex preffix
			if (stmt[idx] == '0' && idx + 1 < stmt.length() && stmt[idx + 1] == 'x') {
				token.append(stmt.substr(idx, 2));
				idx += 2;
			}

			// read num and fp
			size_t idx2 = idx;
			while (isalnum(stmt[idx]) && !isalpha(stmt[idx]) || stmt[idx] == '.') {
				if (++idx >= stmt.length())
					break;
			}
			token.append(stmt.substr(idx2, idx - idx2));

			idx2 = idx;
			if (idx < stmt.length()) {
				// read suffix
			}

			// check boundary
			if (!isspace(stmt[idx]) && !isoper(stmt[idx]))
				break;

			return NUM;
		}

		// op 
		else if (isoper(stmt[idx])) {
			if (stmt[idx] == '+') {
				if (idx + 1 < stmt.length() && (stmt[idx + 1] == '+' || stmt[idx + 1] == '=')) {
					token.append(stmt.substr(idx, 2));
					idx += 2;
				}
				else
					token.append(1, stmt[idx++]);
			}
			else  if (stmt[idx] == '-') {
				if (idx + 1 < stmt.length() && (stmt[idx + 1] == '-' || stmt[idx + 1] == '=')) {
					token.append(stmt.substr(idx, 2));
					idx += 2;
				}
				else
					token.append(1, stmt[idx++]);
			}
			else if (stmt[idx] == '*' || stmt[idx] == '/' || stmt[idx] == '%'
				|| stmt[idx] == '&' || stmt[idx] == '|'
				|| stmt[idx] == '~' || stmt[idx] == '!' || stmt[idx] == '=') {
				if (idx + 1 < stmt.length() && stmt[idx + 1] == '=') {
					token.append(stmt.substr(idx, 2));
					idx += 2;
				}
				else
					token.append(1, stmt[idx++]);
			}
			else if (stmt[idx] == '>') {
				if (idx + 1 < stmt.length() && (stmt[idx + 1] == '>' || stmt[idx] == '=')) {
					token.append(stmt.substr(idx, 2));
					idx += 2;
				}
				else
					token.append(1, stmt[idx++]);
			}
			else if (stmt[idx] == '<') {
				if (idx + 1 < stmt.length() && (stmt[idx + 1] == '<' || stmt[idx] == '=')) {
					token.append(stmt.substr(idx, 2));
					idx += 2;
				}
				else
					token.append(1, stmt[idx++]);
			}
			else 	// ;, (, )
				token.append(1, stmt[idx++]);

			return OP;
		}

		// others
		else
			break;
	}

	// handle others
	int idx2 = idx;
	while (idx < stmt.length() && !isoper(stmt[idx]) && !isspace(stmt[idx]))
		++idx;
	token.append(stmt.substr(idx2, idx - idx2));

	if (token.empty())
		return NONE;
	else
		return OTH;
}




  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值