词法分析识别C语言格式:
{
int a;
int b;
int i;
a = 0;
b = 1;
for (i=1; i <= 10; i=i+1)
{
a=a+i;
b=b*i;
}
}
语法分析:(注意C语言语法规则,先定义后操作,所有定义必须写在前面)
语法分析格式了一下词法分析输出文本便于语义分析操作。
语法分析采用LL1分析表完成
LL1分析表如下:
{ | } | int | if | read | write | for | while | ID | NUM | ; | ( | ) | else | + | - | * | / | > | < | = | >= | <= | == | != | # | ||
<program> | S | S -> { A B } | |||||||||||||||||||||||||
<declaration_list> | A | A -> ε | A -> ε | A -> C | A -> ε | A -> ε | A -> ε | A -> ε | A -> ε | A -> ε | A -> ε | A -> ε | |||||||||||||||
<statement_list> | B | B -> F | B -> ε | B -> F | B -> F | B -> F | B -> F | B -> F | B -> F | B -> F | B -> ε | ||||||||||||||||
<declaration_list1> | C | C -> ε | C -> ε | C -> D C | C -> ε | C -> ε | C -> ε | C -> ε | C -> ε | C -> ε | C -> ε | C -> ε | |||||||||||||||
<declaration_stat> | D | D -> int ID ; | |||||||||||||||||||||||||
<statement_list1> | F | F -> G F | F -> ε | F -> G F | F -> G F | F -> G F | F -> G F | F -> G F | F -> G F | F -> G F | F -> ε | ||||||||||||||||
<statement> | G | G -> M | G -> H | G -> K | G -> L | G -> J | G -> I | G -> R | G -> ; | ||||||||||||||||||
<if_stat> | H | H -> if ( P ) G Z | |||||||||||||||||||||||||
ε|else <statement> | Z | Z -> ε | Z -> ε | Z -> ε | Z -> ε | Z -> ε | Z -> ε | Z -> ε | Z -> ε | Z -> ε | Z -> else G | Z -> ε | |||||||||||||||
<while_stat> | I | I -> while ( P ) G | |||||||||||||||||||||||||
<for_stat> | J | J -> for ( O ; P ; O ) G | |||||||||||||||||||||||||
<read_stat> | K | K -> read ID ; | |||||||||||||||||||||||||
<write_stat> | L | L -> write Q ; | |||||||||||||||||||||||||
<compound_stat> | M | M -> { B } | |||||||||||||||||||||||||
<assignment_expression> | O | O -> ID = Q | |||||||||||||||||||||||||
<bool_expression> | P | P -> Q T | P -> Q T | P -> Q T | |||||||||||||||||||||||
<arithmetic_expression> | Q | Q -> V W | Q -> V W | Q -> V W | |||||||||||||||||||||||
<assignment_stat> | R | R -> O ; | |||||||||||||||||||||||||
<bool_expression1> | T | T -> > Q | T -> < Q | T -> = Q | T -> >= Q | T -> <= Q | T -> == Q | T -> != Q | |||||||||||||||||||
<term> | V | V -> X Y | V -> X Y | V -> X Y | |||||||||||||||||||||||
<arithmetic_expression1> | W | W -> ε | W -> ε | W -> + V W | W -> + V W | W -> ε | W -> ε | W -> ε | W -> ε | W -> ε | W -> ε | W -> ε | W -> ε | ||||||||||||||
<factor> | X | X -> ID | X -> NUM | X -> ( Q ) | |||||||||||||||||||||||
<term1> | Y | Y -> ε | Y -> ε | Y -> ε | Y -> ε | Y -> * X Y | Y -> / X Y | Y -> ε | Y -> ε | Y -> ε | Y -> ε | Y -> ε | Y -> ε | Y -> ε | Y -> ε |
注意if特判,if语句有两个产生式但是我表里面只有一个产生式,我在代码里面特判了if语句
整个词法+语法分析代码如下(C++):
/**
* Create By ZZK
**/
#include<iostream>
#include<fstream>
#include<string>
#include<string.h>
#include<vector>
#include<stack>
#include<map>
using namespace std;
#define mkp(a,b) make_pair(a,b)
int index = 0; //文件字符伪指针
int len = 0; //存一行字符串长度
char buffer[1005]; //存一行文本字符
ifstream read("A.txt"); //输入流 注意修改为自己的文件
ofstream anss("lex.txt"); //输出流 注意修改为自己的文件
int nowline = 0; //保存当前行
int firstline = 0; //保存第一个注释(/*)位置
string ans = "";
string mKeyWord[7] = { "if","else","while","for","int","read","write" };//保留字
char mSprate[6] = { ';',',','{','}','(',')' }; //分隔符
char mOperater[8] = { '+','-','*','/','>','<','=','!' }; //运算符
char mPassword[4] = { ' ','\t','\r','\n' };
//状态函数申明对应DFA的11个状态
//下面所有函数的参数解释
/*
* upper进入此状态之前的字符串
* st 上一个状态传递过来的字符
*
*/
void error(string st, int line); //错误状态
void firstAlph(string upper, char st); //A状态
void firstNumber1(string upper, char st); //B状态
void firstNumber0(string upper, char st); //B1状态
void firstSprate(string upper, char st); //C状态
void firstFei(string upper, char st); //D1状态
void secondEqual(string upper, char st); //D状态
void firstE_Mi_Ma(string upper, char st); //D2状态
void firstDiv(char st); //E状态
void secondM(string upper, char st); //E1状态
void ThirdM(string upper, char st); //E2状态
void fourthDiv(string upper, char st); //E3状态
/**
* 获取下一个字符函数
* 返回下一个字符
* 字符指针在文件中的位置用全局变量index来存储
*
*/
char getnextchar() {
if (index < len)
return buffer[index++];
else {
if (!read.eof()) {
nowline++;
read.getline(buffer, 1000);
index = 0;
len = strlen(buffer);
//每行后面加个空格标注行
buffer[len] = ' ';
buffer[len + 1] = '\0';
len++;
return buffer[index++];
}
else {
return '\0';
}
}
}
//判断是否为保留字
bool isKeyWord(string word) {
for (int i = 0; i < 7; i++) {
if (mKeyWord[i] == word)return true;
}
return false;
}
//判断分隔符
bool isSprate(char ch) {
for (int i = 0; i < 6; i++) {
if (mSprate[i] == ch)return true;
}
return false;
}
//判断运算符
bool isOperator(char ch) {
for (int i = 0; i < 8; i++) {
if (mOperater[i] == ch)return true;
}
return false;
}
//判断过滤符
bool isPassWord(char ch) {
for (int i = 0; i < 4; i++) {
if (mPassword[i] == ch)return true;
}
return false;
}
//判断字母
bool isAlph(char st) {
if (st >= 'a'&&st <= 'z' || st >= 'A'&&st <= 'Z')return true;
else return false;
}
//判断数字
bool isNumber(char st) {
if (st >= '0'&&st <= '9')return true;
else return false;
}
/**
* st 错误字符串
* line 表示第几行错误
*/
void error(string st, int line) {
cout << "第" << line << "行错误: " << st << endl;
}
void firstAlph(string upper, char st) {
char nextch = getnextchar();
if (isAlph(nextch) || isNumber(nextch)) {
firstAlph(upper + st, nextch);
}
else {
index--;
//判断是否保留字
if (isKeyWord(upper + st)) {
anss << nowline << " " << "DEF " << upper + st << endl;
}
else {
anss << nowline << " " << "ID " << upper + st << endl;
}
}
}
void firstNumber1(string upper, char st) {
char nextch = getnextchar();
if (isNumber(nextch)) {
firstNumber1(upper + st, nextch);//状态转移
}
else {
index--;
anss << nowline << " " << "UNSIGNINT " << upper + st << endl;
}
}
void firstNumber0(string upper, char st) {
anss << nowline << " " << "UNSIGNINT " << upper + st << endl;
}
void firstSprate(string upper, char st) {
anss << nowline << " " << "DEM " << upper + st << endl;
}
void firstFei(string upper, char st) {
char nextch = getnextchar();
if (nextch != '=') {
index--;
error(upper + st, nowline);//出现错误
}
else {
secondEqual(upper + st, nextch);//状态转移
}
}
//upper上级字符串
void secondEqual(string upper, char st) {
anss << nowline << " " << "OP " << upper + st << endl;
}
//Equals = min < max >
void firstE_Mi_Ma(string upper, char st) {
char nextch = getnextchar();
index--;//回退一个字符
if (nextch == '=') {
index++;//前进一个字符
secondEqual(upper + st, nextch);//跳转下一个状态
}
else {
anss << nowline << " " << "OP " << st << endl;
}
}
void firstDiv(char st) {
ans += st;
//遇到/再读一个字符,判断是否是注释
char nextch = getnextchar();
index--;
if (nextch == '*') {
index++;
firstline = nowline; //记录第一个注释位置
secondM(ans, nextch);//跳转下一个状态
}
else {
anss << nowline << " " << "OP " << st << endl;
}
}
//Muti *
void secondM(string upper, char st) {
char nextch = getnextchar();
//特判文件结束
if (nextch == '\0') {
error(upper + st, firstline);
//return;
}
if (nextch != '*') {
//nextch字符在注释之内,故抛弃该字符
secondM(upper, st);//再次进入此状态
}
else {//nextch == '*',将上级字符拼接传到下个函数
upper = upper + st;
ThirdM(upper, nextch);//跳转下一个状态
}
}
void ThirdM(string upper, char st) {
char nextch = getnextchar();
if (nextch == '*') {
ThirdM(upper, nextch);//再次进入此状态
}
else if (nextch != '*'&&nextch != '/') {
string temp = "/";
secondM(temp, '*'); //回退上一个状态
}
else if (nextch == '/') {
upper = upper + st;
fourthDiv(upper, nextch);//跳转下一个状态
}
else {
error("/*", firstline);//错误就输出
}
}
//注释终止状态
void fourthDiv(string upper, char st) {
anss << nowline << " " << "NOTE " << "/*" << endl;
anss << nowline << " " << "NOTE " << "*/" << endl;
}
void analyse() {
char st = getnextchar();
while (st != '\0') {
ans = "";
//根据第一个字符进行状态转移
if (isPassWord(st)) {
st = getnextchar();
continue;
}
if (isAlph(st)) {
firstAlph(ans, st);
}
else if (st == '0') {
firstNumber0(ans, st);
}
else if (st != '0'&&isNumber(st)) {
firstNumber1(ans, st);
}
else if (isSprate(st)) {
firstSprate(ans, st);
}
else if (st == '!') {
firstFei(ans, st);
}
else if (st == '+' || st == '-' || st == '*') {
secondEqual(ans, st);
}
else if (st == '=' || st == '<' || st == '>') {
firstE_Mi_Ma(ans, st);
}
else if (st == '/') {
firstDiv(st);
}
else {
error(ans + st, nowline);
}
st = getnextchar();
}
}
/* 下面语法分析 */
int index2 = 0; //文件字符伪指针
int len2 = 0; //存一行字符串长度
char buffer2[1005]; //存一行文本字符
ifstream read2("lex.txt"); //输入流 注意修改为自己的文件
stack<string>mStack;
string Vt[26] = { "{","}","int","if","read","write","for","while","ID","NUM",";","(",")","else","+","-","*","/",">","<","=","+","<=",">=","==","!=" };
map<pair<string, string>, string>mp;
void inittext(); //初始化文本
void error(); //语法错误函数
void S_program(); //语法分析开始
void initMp();
struct Word
{
string line; //单词所在行
string content; //单词内容
string type; //单词类型
};
Word word; //全局单词
Word text[1000];//全局文本
Word lastword; //保存前一个单词
/* 初始化LL1分析表采用MAP结构储存
* 这么多当然不是自己一个一个手写出来的啦
* 使用强大的Python读取Excel生成的
**/
void initMp() {
mp[mkp("S", "{")] = "{ A B }";
mp[mkp("A", "{")] = "NULL";
mp[mkp("A", "}")] = "NULL";
mp[mkp("A", "int")] = "C";
mp[mkp("A", "if")] = "NULL";
mp[mkp("A", "read")] = "NULL";
mp[mkp("A", "write")] = "NULL";
mp[mkp("A", "for")] = "NULL";
mp[mkp("A", "while")] = "NULL";
mp[mkp("A", "ID")] = "NULL";
mp[mkp("A", ";")] = "NULL";
mp[mkp("A", "#")] = "NULL";
mp[mkp("B", "{")] = "F";
mp[mkp("B", "}")] = "NULL";
mp[mkp("B", "if")] = "F";
mp[mkp("B", "read")] = "F";
mp[mkp("B", "write")] = "F";
mp[mkp("B", "for")] = "F";
mp[mkp("B", "while")] = "F";
mp[mkp("B", "ID")] = "F";
mp[mkp("B", ";")] = "F";
mp[mkp("B", "#")] = "NULL";
mp[mkp("C", "{")] = "NULL";
mp[mkp("C", "}")] = "NULL";
mp[mkp("C", "int")] = "D C";
mp[mkp("C", "if")] = "NULL";
mp[mkp("C", "read")] = "NULL";
mp[mkp("C", "write")] = "NULL";
mp[mkp("C", "for")] = "NULL";
mp[mkp("C", "while")] = "NULL";
mp[mkp("C", "ID")] = "NULL";
mp[mkp("C", ";")] = "NULL";
mp[mkp("C", "#")] = "NULL";
mp[mkp("D", "int")] = "int ID ;";
mp[mkp("F", "{")] = "G F";
mp[mkp("F", "}")] = "NULL";
mp[mkp("F", "if")] = "G F";
mp[mkp("F", "read")] = "G F";
mp[mkp("F", "write")] = "G F";
mp[mkp("F", "for")] = "G F";
mp[mkp("F", "while")] = "G F";
mp[mkp("F", "ID")] = "G F";
mp[mkp("F", ";")] = "G F";
mp[mkp("F", "#")] = "NULL";
mp[mkp("G", "{")] = "M";
mp[mkp("G", "if")] = "H";
mp[mkp("G", "read")] = "K";
mp[mkp("G", "write")] = "L";
mp[mkp("G", "for")] = "J";
mp[mkp("G", "while")] = "I";
mp[mkp("G", "ID")] = "R";
mp[mkp("G", ";")] = ";";
mp[mkp("H", "if")] = "if ( P ) G Z";
mp[mkp("Z", "{")] = "NULL";
mp[mkp("Z", "}")] = "NULL";
mp[mkp("Z", "if")] = "NULL";
mp[mkp("Z", "read")] = "NULL";
mp[mkp("Z", "write")] = "NULL";
mp[mkp("Z", "for")] = "NULL";
mp[mkp("Z", "while")] = "NULL";
mp[mkp("Z", "ID")] = "NULL";
mp[mkp("Z", ";")] = "NULL";
mp[mkp("Z", "else")] = "else G";
mp[mkp("Z", "#")] = "NULL";
mp[mkp("I", "while")] = "while ( P ) G";
mp[mkp("J", "for")] = "for ( O ; P ; O ) G";
mp[mkp("K", "read")] = "read ID ;";
mp[mkp("L", "write")] = "write Q ;";
mp[mkp("M", "{")] = "{ B }";
mp[mkp("O", "ID")] = "ID = Q";
mp[mkp("P", "ID")] = "Q T";
mp[mkp("P", "NUM")] = "Q T";
mp[mkp("P", "(")] = "Q T";
mp[mkp("Q", "ID")] = "V W";
mp[mkp("Q", "NUM")] = "V W";
mp[mkp("Q", "(")] = "V W";
mp[mkp("R", "ID")] = "O ;";
mp[mkp("T", ">")] = "> Q";
mp[mkp("T", "<")] = "< Q";
mp[mkp("T", " =")] = "= Q";
mp[mkp("T", ">=")] = ">= Q";
mp[mkp("T", "<=")] = "<= Q";
mp[mkp("T", " ==")] = "== Q";
mp[mkp("T", "!=")] = "!= Q";
mp[mkp("V", "ID")] = "X Y";
mp[mkp("V", "NUM")] = "X Y";
mp[mkp("V", "(")] = "X Y";
mp[mkp("W", ";")] = "NULL";
mp[mkp("W", ")")] = "NULL";
mp[mkp("W", "+")] = "+ V W";
mp[mkp("W", "-")] = "+ V W";
mp[mkp("W", ">")] = "NULL";
mp[mkp("W", "<")] = "NULL";
mp[mkp("W", " =")] = "NULL";
mp[mkp("W", ">=")] = "NULL";
mp[mkp("W", "<=")] = "NULL";
mp[mkp("W", " ==")] = "NULL";
mp[mkp("W", "!=")] = "NULL";
mp[mkp("W", "#")] = "NULL";
mp[mkp("X", "ID")] = "ID";
mp[mkp("X", "NUM")] = "NUM";
mp[mkp("X", "(")] = "( Q )";
mp[mkp("Y", ";")] = "NULL";
mp[mkp("Y", ")")] = "NULL";
mp[mkp("Y", "+")] = "NULL";
mp[mkp("Y", "-")] = "NULL";
mp[mkp("Y", "*")] = "* X Y";
mp[mkp("Y", "/")] = "/ X Y";
mp[mkp("Y", ">")] = "NULL";
mp[mkp("Y", "<")] = "NULL";
mp[mkp("Y", " =")] = "NULL";
mp[mkp("Y", ">=")] = "NULL";
mp[mkp("Y", "<=")] = "NULL";
mp[mkp("Y", " ==")] = "NULL";
mp[mkp("Y", "!=")] = "NULL";
mp[mkp("Y", "#")] = "NULL";
}
bool isVt(string s) {
for (int i = 0; i < 26; i++) {
if (s == Vt[i]) {
return true;
}
}
return false;
}
//以空格字符分割字符串函数
vector<string> split(string str)
{
string::size_type pos;
vector<string> result;
str += " ";//扩展字符串以方便操作
int size = str.size();
for (int i = 0; i<size; i++)
{
pos = str.find(" ", i);
if (pos<size)
{
string s = str.substr(i, pos - i);
result.push_back(s);
i = pos;
}
}
return result;
}
void inittext() {
bool flag = 0;
while (!read2.eof()) {
read2.getline(buffer2, 1000);
string tmp = buffer2;
while (tmp.length() == 0) {
if (!read2.eof()) {
read2.getline(buffer2, 1000);
tmp = buffer2;
}
else {
word.line = "最后一";
word.content = "#";
word.type = "#";
text[len2++] = word;
flag = 1;
return;
}
}
string str = " ";
vector<string>strlist = split(tmp);
word.line = strlist[0];
word.type = strlist[1];
word.content = strlist[2];
if (word.type == "UNSIGNINT")word.type = "NUM";
else if (word.type == "ID")word.type = "ID";
else word.type = word.content;
text[len2++] = word;
}
if (flag) {
word.line = "9999999";
word.content = '\0';
word.type = "#";
text[len2++] = word;
return;
}
}
Word getNext() {
if (index2 < len2) {
return text[index2++];
}
else {
return text[len2 - 1];
}
}
bool isStatementFirst(Word wd) {
if (wd.type == "if" || wd.type == "read" || wd.type == "for" || wd.type == "while"
|| wd.type == "{" || wd.type == "ID" || wd.type == "write" || wd.type == ";")
return true;
else
return false;
}
void error() {
cout << lastword.line << "行错误 " << "错误单词: " << lastword.content << endl;
}
void S_program() {
bool flag = 1;
word = getNext();
lastword = word;
string exp;
while (flag) {
exp = mStack.top();
mStack.pop();
if (word.line == "21") {
int as = 1;
as += 1;
}
if (isVt(exp)) {
if (word.type == exp) {
lastword = word;
word = getNext();
}
else {
error();
return;
}
}
else if (exp == "#") {
if (exp == word.content) {
cout << "语法分析成功" << endl;
flag = 0;
}
else {
error();
return;
}
}
else if (mp.find(mkp(exp, word.type)) != mp.end()) {
if (word.type == "else") {
Word tmp = getNext();
index2--;//回退
if (!isStatementFirst(tmp)) {
continue;
}
}
if (word.line == "5") {
word.line = "5";
}
string tmp = mp[mkp(exp, word.type)];
if (tmp != "NULL") {
vector<string>a = split(tmp);
reverse(a.begin(), a.end());//反转字符串
for (int i = 0; i < a.size(); i++) {
mStack.push(a[i]);
}
}
}
else {
error();
return;
}
}
}
void outputtext() {
ofstream outt("C.txt"); //输出流
for (int i = 0; i < len2; i++) {
outt << text[i].type << ' ' << text[i].content << endl;
}
}
int main() {
//读文件
if (!read.is_open()) {
cout << "打开文件失败" << endl;
}
else {
//词法分析函数
analyse();
cout << "词法分析完成,结果输出到了lex.txt文件" << endl;
//读文件
if (!read2.is_open()) {
cout << "打开文件失败" << endl;
}
else {
//初始化文本
inittext();
//格式化词法分析输出
outputtext();
printf("C.txt文件输出成功\n");
initMp();
mStack.push("#");
mStack.push("S");
S_program();
}
}
return 0;
}
python读取excel生成C++初始LL1分析表代码:
import xlrd
import xlwt
workbook = xlrd.open_workbook(r'E:\\Python3Project\\first\\data3.xlsx')
print(workbook.sheet_names())
sheet1 = workbook.sheet_by_index(0) # sheet索引从0开始
# sheet的名称,行数,列数
row = sheet1.nrows
col = sheet1.ncols
print(sheet1.name, sheet1.nrows, sheet1.ncols)
vt=sheet1.row(0)
for i in range(1, row, 1):
for j in range(2, col, 1):
ss = sheet1.row(i)[j].value
if ">" in ss:
ss1 = ss[:1]
ss2 = ss[5:]
ls=[]
if ss2 == "ε":
ss2 = "NULL"
stans="mp[mkp(\""+ss1+"\",\""+vt[j].value+"\")] = \""+ss2+"\";"
print(stans)
运行此程序可以看到此结果:
是不是很方便,直接生成C++代码,什么?你不懂怎么使用python读取excel,去看我的上一篇python读取excel博客吧
最后附上词法分析以及语法所有文件结果图片:
输入文件A.txt
单词分类文件:
lex.txt输出文件:
C.txt输出文件:
附上一张运行程序结果:
下一篇语义分析继续。