实验目的:对循环语句和条件判断语句编写词法分析编译程序,只能通过一遍扫描完成。
实验要求:
(1)关键字:for if then else while do until int input output
所有关键字都是小写。
(2)运算符和分隔符: = : + - * / < > <= <> >= ; ( ) #
(3)其他标识符(ID)和整型常数(NUM),通过以下模式定义:
ID=letter(letter | digit)*
NUM=digit digit*
(4)空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、分隔符和关键字,词法分析阶段通常被忽略。
各种词法单元对应的词法记号如下:
词法单元 | 词法记号 | 词法单元 | 词法记号 |
for | 1 | : | 17 |
if | 2 | := | 18 |
then | 3 | < | 20 |
else | 4 | <> | 21 |
while | 5 | <= | 22 |
do | 6 | > | 23 |
letter(letter+digit)* | 10 | >= | 24 |
digit digit* | 11 | = | 25 |
+ | 13 | ; | 26 |
- | 14 | ( | 27 |
* | 15 | ) | 28 |
/ | 16 | # | 0 |
until | 29 | int | 30 |
input | 31 | output | 32 |
词法分析程序的功能:
输入:源程序
输出:二元组(词法记号,属性值/其在符号表中的位置)构成的序列。
例如:对源程序
int x:=5; if (x>0) then x:=2*x+1/3; else x:=2/x; #
经词法分析后输出如下序列:
(30, int)(10,’x’)(18, :=) (11,5) (26, ;) (2, if ) (27,( )……
具体实现:
import java.io.*;
import java.util.*;
public class test {
//创建词法单元和词法记号的对照表
static void setHashMap(HashMap<String,Integer> hashmap) {
hashmap.put("for", 1);
hashmap.put("if", 2);
hashmap.put("then", 3);
hashmap.put("else", 4);
hashmap.put("while", 5);
hashmap.put("do", 6);
hashmap.put("ID", 10);
hashmap.put("NUM", 11);
hashmap.put("+", 13);
hashmap.put("-", 14);
hashmap.put("*", 15);
hashmap.put("/", 16);
hashmap.put(":", 17);
hashmap.put(":=", 18);
hashmap.put("<", 20);
hashmap.put("<>", 21);
hashmap.put("<=", 22);
hashmap.put(">", 23);
hashmap.put(">=", 24);
hashmap.put("=", 25);
hashmap.put(";", 26);
hashmap.put("(", 27);
hashmap.put(")", 28);
hashmap.put("until", 29);
hashmap.put("input", 31);
hashmap.put("int", 30);
hashmap.put("*output", 32);
hashmap.put("#", 0);
}
//用两个数组分别来保存关键字和操作符
static String []keyWords = {"for","if","then","else","while",
"do","until","int","input","output"};
static String []operator = {"+","-","*","/",":",":=",
"<","<=","<>",">",">=",
";","(",")","#","="};
//判断某字符是否是字母(包括大写和小写)
static boolean isLetter(char letter) {
if(letter >='a'&& letter<='z' || letter>='A'&& letter<='Z')
return true;
else
return false;
}
//判断某个字符是否是数字
static boolean isDigit(char digit) {
if(digit>='0' && digit<='9')
return true;
else
return false;
}
//判断某个字符串是不是关键字,也就是在不在keyWords数组中
static boolean isKeyWord(String str) {
for(int i=0;i<keyWords.length;i++) {
if(keyWords[i].equals(str))
return true;
}
return false;
}
//逐一扫描函数,参数str是要扫描的字符串,num起下标的作用,hashmap是词法记号参照表
static int Scan(String str,int num,HashMap<String,Integer> hashmap) {
char c = str.charAt(num);
//如果c是空格符或者横向制表符,继续取下一位字符
while(c == ' '|| c == '\t' ) {
num++;
if(num>=str.length())
return num;
c = str.charAt(num);
}
//token用来保存当前扫描到的字符串
String token = "";
//判断部分
if(isLetter(c)) {
token = token+c;
num++;
while(isLetter(str.charAt(num)) || isDigit(str.charAt(num))) {
token = token+str.charAt(num);
num++;
}
//如果是关键字,直接打印输出,并返回num,表示遍历到哪里
if(isKeyWord(token)) {
System.out.print("("+hashmap.get(token)+","+token+") ");
return num;
}else {//如果是ID符,打印输出
System.out.print("("+hashmap.get("ID")+","+token+") ");
return num;
}
}else if(isDigit(c)) {
token = token+c;
num++;
while(isDigit(str.charAt(num))) {
token = token+str.charAt(num);
num++;
}
//如果是连续的数字,那么可以认为是NUM
System.out.print("("+hashmap.get("NUM")+","+token+") ");
return num;
}else if(c == '<') {
num++;
if(str.charAt(num) == '=') {
System.out.print("("+hashmap.get("<=")+","+"<="+") ");
return ++num;
}else if(str.charAt(num) == '>') {
System.out.print("("+hashmap.get("<>")+","+"<>"+") ");
return ++num;
}else {
System.out.print("("+hashmap.get("<")+","+"<"+") ");
return num;
}
}else if(c == '>') {
num++;
if(str.charAt(num) == '=') {
System.out.print("("+hashmap.get(">=")+","+">="+") ");
return ++num;
}else {
System.out.print("("+hashmap.get(">")+","+">"+") ");
return num;
}
}else if(c == ':') {
num++;
if(str.charAt(num) == '=') {
System.out.print("("+hashmap.get(":=")+","+":="+") ");
return ++num;
}else {
System.out.print("("+hashmap.get(":")+","+":"+") ");
return num;
}
}else if(c == '+'||c == '-'||c == '*'||c == '/'||c == '='||c == ';'||
c == '('||c == ')'||c == '#') {
token = token+c;
System.out.print("("+hashmap.get(token)+","+token+") ");
return ++num;
}else {
System.out.print("error ");
}
return 0;
}
/*
main函数主要过程:创建source.txt文件,将控制台输入的代码保存到txt文件中。
从文件中逐行读取字符串,再进行拼接成一个字符串,对拼接好的字符串进行逐个扫描
*/
public static void main(String[] args) {
try {
File file = new File("source.txt");
if(!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(file);
PrintWriter pw = new PrintWriter(fw);
Scanner input = new Scanner(System.in);
do {
String str = input.nextLine();
if(str.equals("")) {
break;
}
pw.println(str);
}while(true);
pw.close();
fw.close();
FileInputStream fis = new FileInputStream(file);
InputStreamReader isr = new InputStreamReader(fis);
BufferedReader br = new BufferedReader(isr);
String s = "";
String str="";
while((s = br.readLine()) != null) {
str+=s+" ";
}
//System.out.println(str+" length:"+str.length());
HashMap<String,Integer> hashmap = new HashMap<String,Integer>();
setHashMap(hashmap);
for(int i=0;i<str.length(); ) {
//System.out.println("i: "+i);
i = Scan(str,i,hashmap);
}
}catch(IOException e) {
e.printStackTrace();
}
}
}
测试结果:
输入:
输出:(30,int) (10,x) (25,=) (11,5) (26,;) (2,if) (27,() (10,x) (23,>) (11,0) (28,)) (10,x) (25,=) (11,2) (15,*) (10,x) (13,+) (11,1) (16,/) (11,3) (26,;) (4,else) (10,x) (25,=) (11,2) (16,/) (10,x) (26,;)