词法分析器的设计-JavaScript实现(编译原理实验)
题目
设计一个词法分析器程序,该程序能完成如下功能:从文本文件中读取字符串,分析器能识别出字符串中单词的是整数还是以英文字母开头的合法标识符,标识符中是否有while、do、if、then、else、begin、end、and、or、not等保留字。若单词是整数则输出整数的编码及数值,是标识符则判定是否为保留字,是保留字则将保留字及其对应的编码输出,否则输出标识符号及其编码。若不是合法标识符则输出-1。要求用图形界面方式编程.
可以有如下约定:保留字if的编码为0,then为1,else为2, while为3, begin为4,do为5,end为6,and为39,or为40,not为41。整数的编码为57,标识符的编码56.
例如:若文本文件中的字符串为 while do at a45 a+ 34 end 3a则输出为(while, 3)(do, 5), (at, 56), (a45, 56), (a+, 56), (34, 57), (end,6), (3a, -1)
自动机构造
自己画的状态图草图
JavaScript实现
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
<style>
.main {
/* height: 100px; */
width: 400px;
/*水平居中*/
margin: 0px auto;
border: 0.5px solid;
}
.title {
text-align: center;
}
</style>
</head>
<body>
<div class="main">
<div class="title">
词法分析器
</div>
<div style="border: 0.5px solid; width: 400px; margin-bottom:10px;"></div>
<div style="padding: 10px;">
<input type="file" id="files">
<button id="button" id="fileImport" onclick="stringIdentification()">
开始识别
</button>
<div>
<div class="lable">识别结果:  </div>
<div class="result" id="result">
</div>
</div>
</div>
</div>
</body>
<script>
function trimSpace(array) {
for (var i = 0; i < array.length; i++) {
if (array[i] == "") {
array.splice(i, 1);
i = i - 1;
}
}
return array;
}
function stringIdentification() {
var selectedFile = document.getElementById('files').files[0];
var name = selectedFile.name;
var size = selectedFile.size;
console.log("文件名:" + name + "大小:" + size);
var reader = new FileReader();
reader.readAsText(selectedFile);
var stringData;
reader.onload = function() {
stringData = this.result;
stringData = stringData + " "; //加空格代表一个单词的结束
console.log(stringData);
var set = {
"if": 0,
"then": 1,
"else": 2,
"while": 3,
"begin": 4,
"do": 5,
"end": 6,
"and": 39,
"or": 40,
"not": 41,
"integer": 57,
"identifier": 56,
"illegal": -1
};
var word = stringData.split(" ");
word = trimSpace(word);
var sign = new Array(word.length);
console.log(word);
var index = 0;
var state = 0;
for (var i = 0; i < stringData.length; i++) {
var tempChar = stringData[i];
console.log("i" + i);
console.log("tempChar:" + tempChar);
console.log("state:" + state);
console.log(sign);
if (state == 0) {
if (tempChar == "w") {
state = 1;
} else if (tempChar == "d") {
state = 6;
} else if (tempChar == "i") {
state = 8;
} else if (tempChar == "t") {
state = 10;
} else if (tempChar == "e") {
state = 14;
} else if (tempChar == "b") {
state = 20;
} else if (tempChar == "a") {
state = 25;
} else if (tempChar == "o") {
state = 28;
} else if (tempChar == "n") {
state = 30;
} else if (tempChar == " ") {
state = 0;
} else if (tempChar >= "a" && tempChar <= "z" || tempChar >= "A" && tempChar <= "Z") {
state = 33;
} else if (tempChar >= "0" && tempChar <= "9") {
state = 34;
} else {
state = 35;
}
} else if (state == 1) {
if (tempChar == "h") {
state = 2;
} else {
state = 33;
}
} else if (state == 2) {
if (tempChar == "i") {
state = 3;
} else {
state = 33;
}
} else if (state == 3) {
if (tempChar == "l") {
state = 4;
} else {
state = 33;
}
} else if (state == 4) {
if (tempChar == "e") {
state = 5;
} else {
state = 33;
}
} else if (state == 5) { //while
if (tempChar == " ") {
sign[index] = "while";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 6) {
if (tempChar == "o") {
state = 7;
} else {
state = 33;
}
} else if (state == 7) { //do
if (tempChar == " ") {
sign[index] = "do";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 8) {
if (tempChar == "f") {
state = 9;
} else {
state = 33;
}
} else if (state == 9) { //if
if (tempChar == " ") {
sign[index] = "if";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 10) {
if (tempChar == "h") {
state = 11;
} else {
state = 33;
}
} else if (state == 11) {
if (tempChar == "e") {
state = 12;
} else {
state = 33;
}
} else if (state == 12) {
if (tempChar == "n") {
state = 13;
} else {
state = 33;
}
} else if (state == 13) { //then
if (tempChar == " ") {
sign[index] = "then";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 14) {
if (tempChar == "l") {
state = 15;
} else if (tempChar == "n") {
state = 18;
} else {
state = 33;
}
} else if (state == 15) {
if (tempChar == "s") {
state = 16;
} else {
state = 33;
}
} else if (state == 16) {
if (tempChar == "e") {
state = 17;
} else {
state = 33;
}
} else if (state == 17) { //else
if (tempChar == " ") {
sign[index] = "else";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 18) {
if (tempChar == "d") {
state = 19;
} else {
state = 33;
}
} else if (state == 19) { //end
if (tempChar == " ") {
sign[index] = "end";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 20) {
if (tempChar == "e") {
state = 21;
} else {
state = 33;
}
} else if (state == 21) {
if (tempChar == "g") {
state = 22;
} else {
state = 33;
}
} else if (state == 22) {
if (tempChar == "i") {
state = 23;
} else {
state = 33;
}
} else if (state == 23) {
if (tempChar == "n") {
state = 24;
} else {
state = 33;
}
} else if (state == 24) { //begin
if (tempChar == " ") {
sign[index] = "begin";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 25) {
if (tempChar == "n") {
state = 26;
} else {
state = 33;
}
} else if (state == 26) {
if (tempChar == "d") {
state = 27;
} else {
state = 33;
}
} else if (state == 27) { //and
if (tempChar == " ") {
sign[index] = "and";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 28) {
if (tempChar == "r") {
state = 29;
} else {
state = 33;
}
} else if (state == 29) { //or
if (tempChar == " ") {
sign[index] = "or";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 30) {
if (tempChar == "o") {
state = 31;
} else {
state = 33;
}
} else if (state == 31) {
if (tempChar == "t") {
state = 32;
} else {
state = 33;
}
} else if (state == 32) { //not
if (tempChar == " ") {
sign[index] = "not";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 33) {
if (tempChar == " ") {
sign[index] = "identifier";
index = index + 1;
state = 0;
} else {
state = 33;
}
} else if (state == 34) {
if (tempChar >= "9" || tempChar <= "0" && tempChar != " ") {
state = 35;
} else if (tempChar == " ") {
sign[index] = "integer";
index = index + 1;
state = 0;
} else {
state = 34;
}
} else if (state == 35) {
if (tempChar == " ") {
sign[index] = "illegal";
index = index + 1;
state = 0;
} else {
state = 35;
}
}
}
var result = "";
for (var i = 0; i < sign.length; i++) {
var item = "(" + word[i] + "," + set[sign[i]] + ")";
if (i != 0) {
item = "," + item;
}
result = result + item;
console.log(item);
}
console.log(result);
document.getElementById("result").innerHTML = result;
}
}
</script>
</html>