// Copyright 2023, liserver. All rights reserved.
#include "TokenStream.h"
const char Keyword[][20] = {
"if",
"else",
"while",
"for",
"return",
"break",
"continue",
"int",
"float",
"double",
"string",
"function",
"switch",
"case",
"default",
"class",
"public",
"private",
"protected"};
TokenCode TokenType(Token token)
{
return TokenCode::TK_NOT_DEFINED;
}
bool IsKeyword(std::string str)
{
for (int i = 0; i < sizeof(Keyword) / sizeof(Keyword[0]); i++)
{
if (str == Keyword[i])
return true;
}
return false;
}
int KeywordCode(std::string str)
{
for (int i = 0; i < sizeof(Keyword) / sizeof(Keyword[0]); i++)
{
if (str == Keyword[i])
return i + 1;
}
return -1;
}
bool IsChar(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
}
bool IsNumber(char c)
{
return c >= '0' && c <= '9';
}
TokenStream ReadString(std::string str)
{
std::vector<Token> tokens;
Token token;
int i = 0;
while (i < str.length())
{
// Space
if (str[i] == '\t' || str[i] == '\n' || str[i] == ' ')
{
if (str[i] == '\n')
token.line++;
i++;
continue;
}
// Identifier
else if (IsChar((char)str[i]))
{
while (IsChar((char)str[i]) && !IsNumber((char)str[i]))
{
token.value += str[i];
i++;
}
if (IsKeyword(token.value))
{
token.code = TokenCode(KeywordCode(token.value));
}
else
{
token.code = TokenCode::TK_IDENTIFIER;
}
}
// Number
else if (IsNumber((char)str[i]))
{
int DoubleNumber = 0;
while (IsNumber((char)str[i]))
{
token.value += str[i];
i++;
}
if (str[i] == '.')
{
token.value += str[i];
i++;
while (IsNumber((char)str[i]))
{
token.value += str[i];
i++;
}
token.code = TokenCode::TK_LITERAL_DOUBLE;
}
else
{
token.code = TokenCode::TK_LITERAL_INT;
}
}
// Operator
else
switch (str[i])
{
case '$':
while (str[i] != '\n')
{
i++;
}
i++; // 指针指向下一行,节省一次循环
break;
case '+':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_ADD_ASSIGN;
token.value = "+=";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_ADD;
token.value = "+";
i++;
}
break;
case '-':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_SUB_ASSIGN;
token.value = "-=";
i += 2;
}
else if (str[i + 1] == '>')
{
token.code = TokenCode::TK_OPERATOR_ARROW;
token.value = "->";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_SUB;
token.value = "-";
i++;
}
break;
case '*':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_MUL_ASSIGN;
token.value = "*=";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_MUL;
token.value = "*";
i++;
}
break;
case '/':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_DIV_ASSIGN;
token.value = "/=";
i += 2;
}
else if (str[i + 1] == '/')
{
i += 2;
// 单行注释
while (str[i] != '\n')
i++;
}
else
{
token.code = TokenCode::TK_OPERATOR_DIV;
token.value = "/";
i++;
}
break;
case '%':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_MOD_ASSIGN;
token.value = "%=";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_MOD;
token.value = "%";
i++;
}
break;
case '=':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_EQUAL;
token.value = "==";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_ASSIGN;
token.value = "=";
i++;
}
break;
case '!':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_NOT_EQUAL;
token.value = "!=";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_NOT;
token.value = "!";
i++;
}
break;
case '>':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_GREATER_EQUAL;
token.value = ">=";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_GREATER;
token.value = ">";
i++;
}
break;
case '<':
if (str[i + 1] == '=')
{
token.code = TokenCode::TK_OPERATOR_LESS_EQUAL;
token.value = "<=";
i += 2;
}
else
{
token.code = TokenCode::TK_OPERATOR_LESS;
token.value = "<";
i++;
}
break;
case '&':
if (str[i + 1] == '&')
{
token.code = TokenCode::TK_OPERATOR_AND;
token.value = "&&";
i += 2;
}
else
{
token.code = TokenCode::TK_NOT_DEFINED;
token.value = "&";
i++;
}
break;
case '|':
if (str[i + 1] == '|')
{
token.code = TokenCode::TK_OPERATOR_OR;
token.value = "||";
i += 2;
}
else
{
token.code = TokenCode::TK_NOT_DEFINED;
token.value = "|";
i++;
}
break;
// 分隔符
case '(':
token.code = TokenCode::TK_DELIMITER_LEFT_PARENTHESIS;
token.value = "(";
i++;
break;
case ')':
token.code = TokenCode::TK_DELIMITER_RIGHT_PARENTHESIS;
token.value = ")";
i++;
break;
case '{':
token.code = TokenCode::TK_DELIMITER_LEFT_BRACE;
token.value = "{";
i++;
break;
case '}':
token.code = TokenCode::TK_DELIMITER_RIGHT_BRACE;
token.value = "}";
i++;
break;
case '[':
token.code = TokenCode::TK_DELIMITER_LEFT_BRACKET;
token.value = "[";
i++;
break;
case ']':
token.code = TokenCode::TK_DELIMITER_RIGHT_BRACKET;
token.value = "]";
i++;
break;
case ',':
token.code = TokenCode::TK_DELIMITER_COMMA;
token.value = ",";
i++;
break;
case ';':
token.code = TokenCode::TK_DELIMITER_SEMICOLON;
token.value = ";";
i++;
break;
case '.':
token.code = TokenCode::TK_DELIMITER_DOT;
token.value = ".";
i++;
break;
case ':':
token.code = TokenCode::TK_DELIMITER_COLON;
token.value = ":";
i++;
break;
case '"':
{
std::string tempstra = "";
int temp = 0;
while (str[i + 1] != '"' && i < str.size() && str[i + 1] != '\n')
{
tempstra += str[i + 1];
temp++;
i++;
}
if (str[i + 1] != '"')
{
token.code = TokenCode::TK_NOT_DEFINED;
token.value = "\"";
i -= temp - 1;
}
else
{
token.code = TokenCode::TK_LITERAL_STRING;
token.value = tempstra;
i += 2;
}
break;
}
default:
token.code = TokenCode::TK_NOT_DEFINED;
token.value = str[i];
i++;
break;
}
if (token.code != TokenCode::TK_LITERAL_STRING && token.value == "")
continue;
tokens.push_back(token);
token.value = "";
token.code = TK_NOT_DEFINED;
}
// Save and return
return TokenStream(tokens);
}
TokenStream.h:
// Copyright 2023, liserver. All rights reserved.
#pragma once
#include <iostream>
#include <string>
#include <vector>
#include "../Compiler.h"
enum TokenCode
{
// About Token
TK_NOT_DEFINED = 0, // Not defined
// Keywords
TK_KEYWORD_IF, // if
TK_KEYWORD_ELSE, // else
TK_KEYWORD_WHILE, // while
TK_KEYWORD_FOR, // for
TK_KEYWORD_RETURN, // return
TK_KEYWORD_BREAK, // break
TK_KEYWORD_CONTINUE, // continue
TK_KEYWORD_INT, // int
TK_KEYWORD_FLOAT, // float
TK_KEYWORD_DOUBLE, // double
TK_KEYWORD_STRING, // string
TK_KEYWORD_FUNCTION, // function
TK_KEYWORD_SWITCH, // switch
TK_KEYWORD_CASE, // case
TK_KEYWORD_DEFAULT, // default
TK_KEYWORD_CLASS, // class
TK_KEYWORD_PUBLIC, // public
TK_KEYWORD_PRIVATE, // private
TK_KEYWORD_PROTECTED, // protected
// Operators
TK_OPERATOR_ADD, // +
TK_OPERATOR_SUB, // -
TK_OPERATOR_MUL, // *
TK_OPERATOR_DIV, // /
TK_OPERATOR_MOD, // %
TK_OPERATOR_ASSIGN, // =
TK_OPERATOR_ADD_ASSIGN, // +=
TK_OPERATOR_SUB_ASSIGN, // -=
TK_OPERATOR_MUL_ASSIGN, // *=
TK_OPERATOR_DIV_ASSIGN, // /=
TK_OPERATOR_MOD_ASSIGN, // %=
TK_OPERATOR_EQUAL, // ==
TK_OPERATOR_NOT_EQUAL, // !=
TK_OPERATOR_LESS, // <
TK_OPERATOR_LESS_EQUAL, // <=
TK_OPERATOR_GREATER, // >
TK_OPERATOR_GREATER_EQUAL, // >=
TK_OPERATOR_ARROW, // ->
TK_OPERATOR_AND, // &&
TK_OPERATOR_OR, // ||
TK_OPERATOR_NOT, // !
// Delimiters
TK_DELIMITER_LEFT_BRACE, // {
TK_DELIMITER_RIGHT_BRACE, // }
TK_DELIMITER_LEFT_BRACKET, // [
TK_DELIMITER_RIGHT_BRACKET, // ]
TK_DELIMITER_LEFT_PARENTHESIS, // (
TK_DELIMITER_RIGHT_PARENTHESIS, // )
TK_DELIMITER_COMMA, // ,
TK_DELIMITER_SEMICOLON, // ;
TK_DELIMITER_DOT, // .
TK_DELIMITER_COLON, // :
// Literals
TK_LITERAL_INT, // 123
TK_LITERAL_FLOAT, // 123.456
TK_LITERAL_DOUBLE, // 123.456
TK_LITERAL_STRING, // "Hello World"
// Token IDENTIFIER
TK_IDENTIFIER // identifier
};
class Token
{
public:
TokenCode code = TokenCode::TK_NOT_DEFINED;
std::string value = "";
int line = 1;
};
// 对于Token,每一个一连串的字符都是一个Token
// 例如:123.456是一个Token,"Hello World"是一个Token
// 判断是否是一个关键字
bool IsKeyword(std::string str);
// 返回关键字的ID
int KeywordCode(std::string str);
// 判断是否是一个字符
bool IsChar(char c);
// 判断是否是一个数字
bool IsNumber(char c);
// 首先读取给定字符串每一个字符,如果是空格或者换行符,储存当前Token,然后继续读取下一个字符
TokenCode TokenType(Token token);
class TokenStream
{
public:
int number = 0; // 当前Token的序号
std::vector<Token> tokens;
inline int Size()
{
return tokens.size();
}
TokenStream(){};
TokenStream(std::vector<Token> tokens)
{
this->tokens = tokens;
}
Token Up()
{
if (this->number > 0)
{
this->number -= 1;
return this->tokens[this->number];
}
else
{
this->number = this->tokens.size() - 1;
return this->tokens[this->number];
}
}
Token Next()
{
if (number < Size())
{
Token token = this->tokens[this->number];
this->number += 1;
return token;
}
else
{
this->number = 0;
return this->tokens[this->number];
}
}
inline std::vector<Token> GetVector()
{
return this->tokens;
}
};
TokenStream ReadString(std::string str);