经降重。
三代编译器将一种语法类似 C 语言的语句序列翻译为等价的汇编程序,所输出的汇编程序符合X86汇编语言格式要求,可以在Linux环境下正常运行。词法分析和语法分析部分,可以使用类似Flex和Bison的工具实现,也可以手工实现。
#include <bits/stdc++.h>
using namespace std;
string curr_namespace_return = "";
map<string, int> vTable;
map<string, int> aTable;
map<string, pair<int,int>> fTable;
int varibles = 0;
class Judge {
public:
static int getPriority(const string& op) {
if (op == "!" || op == "~" || op == "---")
return 11;
else if (op == "*" || op == "/" || op == "%")
return 10;
else if (op == "+" || op == "-")
return 9;
else if (op == "<" || op == ">" || op == "<=" || op == ">=")
return 8;
else if (op == "==" || op == "!=")
return 7;
else if (op == "&")
return 6;
else if (op == "^")
return 5;
else if (op == "|")
return 4;
else if (op == "&&")
return 3;
else if (op == "||")
return 2;
else
return 1;
}
static bool isOp(const string& op) {
return (op == "+" || op == "-" || op == "%" || op == ">" || op == ">=" ||
op == "<" || op == "<=" || op == "*" || op == "/" || op == "==" ||
op == "!=" || op == "&" || op == "^" || op == "|" || op == "=" ||
op == "&&" || op == "||" || op == "!" || op == "~" || op == "---");
}
};
class TokenClassifier {
public:
std::regex identifier_regex;
std::regex number_regex;
std::regex operators_regex;
std::smatch match;
TokenClassifier() :
identifier_regex("[a-zA-Z_][0-9a-zA-Z_]*"),
number_regex("[-+]?[0-9]+"),
operators_regex("[*/%+-><]|[><!=]=") {}
int classify(const std::string& str) {
if (str == "main") return 1;
if (str == "int") return 2;
if (str == "return") return 3;
if (str == ";" || str == "{" || str == "}" || str == "(" || str == ")") return 41;
if (str == "^" || str == "|" || str == "&") return 51;
if (str == "||" || str == "&&") return 61;
if (std::regex_match(str, match, identifier_regex)) return 11;
if (std::regex_match(str, match, number_regex)) return 21;
if (std::regex_match(str, match, operators_regex)) return 31;
return -1;
}
};
TokenClassifier classifier;
deque<string> process(fstream& file){
string sent;
deque<string> code;
while(file >> sent){
string curr;
for(int i = 0; i < sent.size(); i ++){
curr += sent[i];
if(classifier.classify(curr) == -1){
int l = curr.size() - 1;
curr.erase(l);
if(curr == "-" && classifier.classify(string(1, sent[i])) == 11) curr = "---";
code.push_back(curr);
curr.clear();
if(!isspace(sent[i])) curr += sent[i];
}
if(sent[i] == '+' || sent[i] == '-'){
int flag = classifier.classify(code.back());
if(flag == 11 || flag == 21){
int l = curr.size() - 1;
curr.erase(l);
code.push_back(string(1, sent[i]));
}
}
}
if (classifier.classify(curr) != -1) {
code.push_back(curr);
}
}
return code;
}
string getMemoryLoc(string& name){
if(aTable.find(name) != aTable.end())
return "+" + to_string((aTable[name] + 1)* 4);
return "-" + to_string(vTable[name] * 4);
}
void calBracket(string symbol, int& bracket){
if(symbol == "(")bracket ++;
else if(symbol == ")")bracket --;
}
void ExpressionProc(deque<string>& code_processed){
if(code_processed.size() == 1){
if (classifier.classify(code_processed[0]) == 21) {
cout << "mov eax, " + code_processed[0] + "\n";
cout << "push eax\n";
}
else if (classifier.classify(code_processed[0]) == 11) {
cout << "mov eax, DWORD PTR [ebp" + getMemoryLoc(code_processed[0]) + "]\n";
cout << "push eax\n\n";
}
}
else{
int n = code_processed.size();
if(fTable.find(code_processed[0]) != fTable.end()){
int idx = 1;
vector<deque<string>> argList;
int aNum = fTable[code_processed[0]].first;
while(aNum){
deque<string> sub;
int bracket = 0;
while(true){
idx ++;
if((code_processed[idx] == "," && bracket == 0) || (code_processed[idx] == ")" && aNum == 1 && bracket == 0)){
aNum --;
argList.push_back(sub);
break;
}
calBracket(code_processed[idx], bracket);
sub.push_back(code_processed[idx]);
}
}
reverse(argList.begin(), argList.end());
for(auto al: argList){
ExpressionProc(al);
}
cout << "call " + code_processed[0] + "\n";
cout << "add esp, " + to_string(fTable[code_processed[0]].first * 4) + "\n";
if(fTable[code_processed[0]].second == 1){
cout << "push eax\n";
}
}
else if(code_processed[0] == "println_int"){
deque<string> expSub;
int bracket = 1, idx = 1;
while(true){
idx ++;
calBracket(code_processed[idx], bracket);
if(code_processed[idx] == ")" && bracket == 0){
ExpressionProc(expSub);
break;
}
expSub.push_back(code_processed[idx]);
}
cout << "push offset format_str\n";
cout << "call printf\n";
cout << "add esp, 8\n";
}
else {
deque<deque<string>> RPN;
deque<string> ops;
int idx = 0;
bool isEquation = false;
while(idx < n){
if(Judge::isOp(code_processed[idx])){
while (!ops.empty() && Judge::getPriority(ops.back()) >= Judge::getPriority(code_processed[idx])) {
string op = ops.back();
ops.pop_back();
RPN.push_back({op});
}
if(code_processed[idx] == "=") isEquation = true;
ops.push_back(code_processed[idx]);
}
else if (code_processed[idx] == ")") {
while (ops.back() != "(") {
string op = ops.back();
ops.pop_back();
RPN.push_back({op});
}
ops.pop_back();
}
else if (code_processed[idx] == "(") {
ops.push_back(code_processed[idx]);
}
else if (fTable.find(code_processed[idx]) != fTable.end()) {
deque<string> sub;
int bracket = 0;
while(true){
sub.push_back(code_processed[idx]);
calBracket(code_processed[idx], bracket);
if(code_processed[idx] == ")" && bracket == 0) break;
idx ++;
}
RPN.push_back(sub);
}
else {
RPN.push_back({code_processed[idx]});
}
idx ++;
}
while(!ops.empty()){
string op = ops.back();
ops.pop_back();
RPN.push_back({op});
}
if (RPN.empty()) return;
deque<deque<string>> results;
for(auto rpn: RPN){
string operatorName = rpn[0];
int exp_len = rpn.size();
if(exp_len == 1 && Judge::isOp(operatorName)){
results.pop_back();
if (operatorName == "+") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "add eax, ebx\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "-") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "sub eax, ebx\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "*") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "imul eax, ebx\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "/") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cdq\n";
cout << "idiv eax, ebx\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "%") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cdq\n";
cout << "idiv eax, ebx\n";
cout << "push edx\n";
cout << "mov eax, edx\n\n";
results.pop_back();
} else if (operatorName == "<") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cmp eax, ebx\n";
cout << "mov eax, 0\n";
cout << "setl al\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "<=") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cmp eax, ebx\n";
cout << "mov eax, 0\n";
cout << "setle al\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "==") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cmp eax, ebx\n";
cout << "mov eax, 0\n";
cout << "sete al\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == ">") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cmp eax, ebx\n";
cout << "mov eax, 0\n";
cout << "setg al\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == ">=") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cmp eax, ebx\n";
cout << "mov eax, 0\n";
cout << "setge al\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "!=") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "cmp eax, ebx\n";
cout << "mov eax, 0\n";
cout << "setne al\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "|") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "or eax, ebx\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "^") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "xor eax, ebx\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "&&") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "test eax, eax\n";
cout << "setnz al\n";
cout << "cbw\n";
cout << "cwde\n";
cout << "test ebx, ebx\n";
cout << "setnz bl\n";
cout << "and al, bl\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "&") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "and eax, ebx\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "||") {
cout << "pop ebx\n";
cout << "pop eax\n";
cout << "test eax, eax\n";
cout << "setnz al\n";
cout << "cbw\n";
cout << "cwde\n";
cout << "test ebx, ebx\n";
cout << "setnz bl\n";
cout << "or al, bl\n";
cout << "push eax\n\n";
results.pop_back();
} else if (operatorName == "!") {
cout << "pop eax\n";
cout << "test eax, eax\n";
cout << "setz al\n";
cout << "cbw\n";
cout << "cwde\n";
cout << "push eax\n\n";
} else if (operatorName == "---") {
cout << "pop eax\n";
cout << "neg eax\n";
cout << "push eax\n\n";
} else if (operatorName == "~") {
cout << "pop eax\n";
cout << "not eax\n";
cout << "push eax\n\n";
} else if (operatorName == "=") {
cout << "pop eax\n";
cout << "mov DWORD PTR [ebp" + getMemoryLoc(results.back()[0]) + "], eax\n\n";
results.pop_back();
}
results.push_back({"pad"});
if (operatorName == "=") results.pop_back();
}
else{
if(operatorName != "") results.push_back(rpn);
if(!isEquation || rpn != RPN[0]) ExpressionProc(rpn);
}
}
}
}
return;
}
int main(int argc, char* argv[]) {
fstream file(argv[1], fstream::in);
// fstream file("code_processed.txt", fstream::in);
deque<string> code_processed = process(file);
int n = code_processed.size();
cout << ".intel_syntax noprefix\n";
cout << ".global main\n";
cout << ".extern printf\n";
cout << ".data\n";
cout << "format_str:\n";
cout << "\t.asciz \"%d\\n\"\n";
cout << ".text\n";
cout << "\n";
int i = -1;
while(i < n){
i ++;
while(i < n && code_processed[i] != ";"){
if(code_processed[i] == "void"){
curr_namespace_return = code_processed[i];
i ++;
aTable.clear();
int idx = i + 1;
int cnt = 0;
while(idx < n && code_processed[idx] != ")"){
if(code_processed[idx] == "int"){
cnt ++;
idx ++;
aTable.insert({code_processed[idx], cnt});
}
idx ++;
}
fTable.insert({code_processed[i], {cnt, 1}});
cout << code_processed[i] + ":\n";
cout << "push ebp\n";
cout << "mov ebp, esp\n";
cout << "sub esp, 0x100\n";
for(; i < n; i ++)if(code_processed[i] == "{") break;
}
else if(code_processed[i] == "int"){
if(code_processed[i + 1] == "main"){
curr_namespace_return = "main";
aTable.clear();
vTable.clear();
varibles = 0;
cout << "main:\n";
cout << "push ebp\n";
cout << "mov ebp, esp\n";
cout << "sub esp, 0x200\n";
for(; i < n; i ++)if(code_processed[i] == "{") break;
}
else if(code_processed[i + 2] == "("){
aTable.clear();
vTable.clear();
varibles = 0;
curr_namespace_return = code_processed[i];
i ++;
int iter = i + 1;
int args_cnt = 0;
while(iter < n && code_processed[iter] != ")"){
if(code_processed[iter] == "int"){
args_cnt ++;
iter ++;
aTable.insert({code_processed[iter], args_cnt});
}
iter ++;
}
fTable.insert({code_processed[i], {args_cnt, 1}});
cout << code_processed[i] + ":\n";
cout << "push ebp\n";
cout << "mov ebp, esp\n";
cout << "sub esp, 0x100\n";
for(; i < n; i ++)if(code_processed[i] == "{") break;
}
else{
int idx = i + 1, bracket = 0;
deque<deque<string>> claimList;
deque<string> vl;
while(idx < n && code_processed[idx] != ";"){
calBracket(code_processed[idx], bracket);
if((code_processed[idx] == "," && bracket == 0)) {
claimList.push_back(vl);
vl.clear();
}
else vl.push_back(code_processed[idx]);
idx ++;
}
if(!vl.empty())claimList.push_back(vl);
for(auto claim : claimList){
vTable.insert({claim[0], ++ varibles});
if(claim.size() > 1){
ExpressionProc(claim);
}
}
i = idx - 1;
}
}
else if(code_processed[i] == "return"){
deque<string> sub;
while(i < n && code_processed[i + 1] != ";"){
i ++;
sub.push_back(code_processed[i]);
}
ExpressionProc(sub);
cout << "leave\nret\n\n";
}
else if(code_processed[i] == "}" && curr_namespace_return == "void"){
cout << "leave\nret\n\n";
curr_namespace_return = "";
}
else if(code_processed[i] == "}"){
int doNothing;
}
else{
deque<string> sub;
while(i < n && code_processed[i] != ";"){
sub.push_back(code_processed[i ++]);
}
ExpressionProc(sub);
i = i - 1;
}
i ++;
}
}
file.close();
return 0;
}