皇家药学院的信管专业毕竟不是专门的计算机专业。WEB的课程也是基本等于没有,没有开设编译原理课程。为了巩固基础知识,自学编译原理
第一课:sub虚拟机和sub简单编译器。
为了简单,虚拟机只支持两个命令:
push和add
push要带一个数字参数,效果就是把跟的参数压栈
add不用带参数,效果就是从栈顶取两个元素相加,把结果压入栈顶
这里借助二叉树实现语法分析,把加法代码解析为二叉树,对二叉树采用左后遍历(先遍历左节点,再遍历右节点,再遍历父节点)。对节点值不是加号的生成push "节点值"的命令。对节点值为加号的节点,生成add命令。这样整个数遍历完了就生成sub虚拟机的机器码了。
sub虚拟机实现
#pragma once
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <stack>
using namespace std;
/*
此学习示例作为编译原理练习用例,实现简单的sub虚拟机。
该虚拟机只支持两个命令:
push 数字
add
虚拟机结构为栈结构,push的作用为向栈压入一个数据
add的作用从栈取两个顶部元素相加,并且把计算结果压入栈顶
比如:1+2+3的机器代码如下,可以自定义高级语言编译得到机器识别的代码
push 1
push 2
add
push 3
add
由虚拟机加载该编译器输出的命令文件压栈执行,最后得到结果
*/
/// <summary>
/// 从文件读入到string里
/// </summary>
/// <param name="filename">文件名</param>
/// <returns>文件串</returns>
string ReadFileIntoString(char* filename);
/// <summary>
/// 分割字符串
/// </summary>
/// <param name="str">字符串</param>
/// <param name="delim">分割串</param>
/// <returns>字符串向量</returns>
vector<string> SplitString(const string& str, const string& delim);
/// <summary>
/// 浮点类型转换字符串
/// </summary>
/// <param name="d"></param>
/// <returns></returns>
string DoubleToString(double d);
/// <summary>
/// 字符串转浮点数
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
double StringToDouble(string str);
/// <summary>
/// 入口函数
/// </summary>
/// <returns></returns>
int main(int argc, char* argv[])
{
//有参数就按参数传
if (argc > 1)
{
for (int i = 1; i < argc; i++)
{
//加载机器代码
string code = ReadFileIntoString(argv[i]);
//分割得到每行代码
vector<string> vecStr = SplitString(code, "\n");
//虚拟机执行栈
stack<string> machineStack;
//顺序执行代码
for (int i = 0; i < vecStr.size(); i++)
{
string oneLine = vecStr[i];
//分割一条命令
vector<string> vecCode = SplitString(oneLine, " ");
//是push命令
if (vecCode[0] == "push")
{
machineStack.push(vecCode[1]);
}
//执行add命令
else if (vecCode[0] == "add")
{
string oneVal = machineStack.top();
machineStack.pop();
string towVal = machineStack.top();
machineStack.pop();
double res = StringToDouble(oneVal)+ StringToDouble(towVal);
machineStack.push(DoubleToString(res));
}
}
//命令执行完成后取出栈顶元素输出
if (machineStack.size() > 0)
{
string res = machineStack.top();
cout << "执行结果:" << res<<endl;
}
}
}
else
{
cout << "未传入要执行的文件名!" << endl;
}
return 0;
}
/// <summary>
/// 浮点类型转换字符串
/// </summary>
/// <param name="d"></param>
/// <returns></returns>
string DoubleToString(double d) {
ostringstream os;
if (os << d) return os.str();
return "invalid conversion";
}
/// <summary>
/// 字符串转浮点数
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
double StringToDouble(string str) {
istringstream iss(str);
double x;
if (iss >> x) return x;
return 0.0;
}
/// <summary>
/// 从文件读入到string里
/// </summary>
/// <param name="filename">文件名</param>
/// <returns>文件串</returns>
string ReadFileIntoString(char* filename)
{
ifstream ifile(filename);
//将文件读入到ostringstream对象buf中
ostringstream buf;
char ch;
while (buf && ifile.get(ch))
{
buf.put(ch);
}
//返回与流对象buf关联的字符串
return buf.str();
}
/// <summary>
/// 分割字符串
/// </summary>
/// <param name="str">字符串</param>
/// <param name="delim">分割串</param>
/// <returns>字符串向量</returns>
vector<string> SplitString(const string& str, const string& delim) {
vector<string> res;
if ("" == str) return res;
//先将要切割的字符串从string类型转换为char*类型
//不要忘了
char* strs = new char[str.length() + 1];
strcpy(strs, str.c_str());
char* d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char* p = strtok(strs, d);
while (p) {
string s = p; //分割得到的字符串转换为string类型
res.push_back(s); //存入结果数组
p = strtok(NULL, d);
}
return res;
}
sub编译器实现
头文件
#pragma once
#include <iostream>
#include <string>
using namespace std;
//二叉树数据类型
typedef string TreeElemType;
/// <summary>
/// 定义二叉树结构
/// </summary>
typedef struct BitTreeNode {
/// <summary>
/// 存数据的节点
/// </summary>
TreeElemType data;
/// <summary>
/// 左边节点
/// </summary>
struct BitTreeNode* leftChild;
/// <summary>
/// 右边节点
/// </summary>
struct BitTreeNode* rightChild;
};
实现文件
#include "CompileStudy.h"
#include <fstream>
#include <sstream>
#include <vector>
#include <stack>
/*
此学习示例作为编译原理练习用例,实现简单的sub编译器。面向一个只支持sub的虚拟机。
该虚拟机只支持两个命令:
push 数字
add
虚拟机结构为栈结构,push的作用为向栈压入一个数据
add的作用从栈取两个顶部元素相加,并且把计算结果压入栈顶
比如:1+2+3就是符合该编译器定义的语法
编译结果为:
push 1
push 2
add
push 3
add
由虚拟机加载该编译器输出的命令文件压栈执行,最后得到结果
*/
/// <summary>
/// 从文件读入到string里
/// </summary>
/// <param name="filename">文件名</param>
/// <returns>文件串</returns>
string ReadFileIntoString(char* filename);
/// <summary>
/// 输出向量字符串到文件
/// </summary>
/// <param name="filename">文件名</param>
/// <returns>文件串</returns>
void WriteStrToFile(string filename, vector<string>* codeList);
/// <summary>
/// 分割字符串
/// </summary>
/// <param name="str">字符串</param>
/// <param name="delim">分割串</param>
/// <returns>字符串向量</returns>
vector<string> SplitString(const string& str, const string& delim);
/// <summary>
/// 遍历二叉树生成栈代码
/// </summary>
/// <param name="node">总节点</param>
/// <returns>栈执行代码</returns>
void TraverseMakeCode(const BitTreeNode* node, vector<string>* codeList);
/// <summary>
/// 替换字符串
/// </summary>
/// <param name="str">源串</param>
/// <param name="old_value">老串</param>
/// <param name="new_value">新串</param>
/// <returns></returns>
string& ReplaceAllStr(string& str, const string& old_value, const string& new_value);
/// <summary>
/// 入口函数
/// </summary>
/// <returns></returns>
int main(int argc, char* argv[])
{
//有参数就按参数传
if (argc > 1)
{
for (int i = 1; i < argc; i++)
{
string code = ReadFileIntoString(argv[i]);
cout << "读取:" << argv[i] << "代码为:" + code << endl;
vector<string> vecStr = SplitString(code,"+");
//1+2+3生成一颗二叉树
if (vecStr.size() > 0)
{
BitTreeNode* mianNode= new BitTreeNode;
mianNode->data = "+";
mianNode->leftChild = NULL;
mianNode->rightChild = NULL;
for (int i = 0; i < vecStr.size(); i++)
{
//树的总节点
BitTreeNode* oneNode = new BitTreeNode;
oneNode->data = vecStr[i];
oneNode->leftChild = NULL;
oneNode->rightChild = NULL;
if (mianNode->leftChild == NULL)
{
mianNode->leftChild = oneNode;
}
else if (mianNode->rightChild == NULL)
{
mianNode->rightChild = oneNode;
}
else
{
BitTreeNode* mianNodeNew = new BitTreeNode;
mianNodeNew->data = "+";
mianNodeNew->leftChild = mianNode;
mianNodeNew->rightChild = oneNode;
mianNode = mianNodeNew;
}
}
//按二叉树生成代码
cout << "构造语法二叉树完成"<< endl;
vector<string> codeList;
//按二叉树生成栈执行代码
TraverseMakeCode(mianNode, &codeList);
string outName = argv[i];
//输出名字
outName = ReplaceAllStr(outName,".code",".sub");
//输出文件
WriteStrToFile(outName, &codeList);
}
}
}
else
{
cout << "未传入要编译的文件名!" << endl;
}
return 0;
}
/// <summary>
/// 替换字符串
/// </summary>
/// <param name="str">源串</param>
/// <param name="old_value">老串</param>
/// <param name="new_value">新串</param>
/// <returns></returns>
string& ReplaceAllStr(string& str, const string& old_value, const string& new_value)
{
while (true) {
string::size_type pos(0);
if ((pos = str.find(old_value)) != string::npos)
str.replace(pos, old_value.length(), new_value);
else break;
}
return str;
}
/// <summary>
/// 遍历二叉树生成栈代码
/// </summary>
/// <param name="node">总节点</param>
/// <returns>栈执行代码</returns>
void TraverseMakeCode(const BitTreeNode * node, vector<string>* codeList)
{
if (node != NULL)
{
if (node->leftChild == NULL&& node->rightChild == NULL)
{
codeList->push_back("push " + node->data);
}
if (node->leftChild != NULL)
{
TraverseMakeCode(node->leftChild, codeList);
}
if (node->rightChild != NULL)
{
TraverseMakeCode(node->rightChild, codeList);
}
if (node->data =="+")
{
codeList->push_back("add");
}
}
}
/// <summary>
/// 从文件读入到string里
/// </summary>
/// <param name="filename">文件名</param>
/// <returns>文件串</returns>
string ReadFileIntoString(char* filename)
{
ifstream ifile(filename);
//将文件读入到ostringstream对象buf中
ostringstream buf;
char ch;
while (buf && ifile.get(ch))
{
buf.put(ch);
}
//返回与流对象buf关联的字符串
return buf.str();
}
/// <summary>
/// 输出向量字符串到文件
/// </summary>
/// <param name="filename">文件名</param>
/// <returns>文件串</returns>
void WriteStrToFile(string filename, vector<string>* codeList)
{
ofstream outfile(filename.c_str(), ios::trunc);
cout << "生成add栈代码" << endl;
for (int j = 0; j < (*codeList).size(); j++)
{
cout << (*codeList)[j] << endl;
outfile << (*codeList)[j] << endl;
}
outfile.close();
cout << "代码生成在:"+ filename<< endl;
}
/// <summary>
/// 分割字符串
/// </summary>
/// <param name="str">字符串</param>
/// <param name="delim">分割串</param>
/// <returns>字符串向量</returns>
vector<string> SplitString(const string& str, const string& delim) {
vector<string> res;
if ("" == str) return res;
//先将要切割的字符串从string类型转换为char*类型
//不要忘了
char* strs = new char[str.length() + 1];
strcpy(strs, str.c_str());
char* d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char* p = strtok(strs, d);
while (p) {
string s = p; //分割得到的字符串转换为string类型
res.push_back(s); //存入结果数组
p = strtok(NULL, d);
}
return res;
}
源码和编译结果
源码
1+2+3
编译结果
push 1
push 2
add
push 3
add
执行测试
源码
1+2+3+23+64+100+76
编译结果
push 1
push 2
add
push 3
add
push 23
add
push 64
add
push 100
add
执行测试
这次的例子复习了C++二叉树和二叉树的遍历。理解基础栈式虚拟机原理,理解基本的代码编译。如果真的有个芯片支持push和add,那么这就是一个简单的求和语言编译器。避免直接写push和add命令,也就是语言抽象的“高级语言”