对一个简单语言的子集编制一个一遍扫描的词法分析程序。
开发思路:
首先定义一个Binaries类来存放结果,里面的属性有两个,一个是int类型的syn(单词种别码),一个是String类型的token(单词自身字符串)。接着编写主程序,让用户输入一段字符串,以#号结束。接着将用户输入的字符串传入我自己编写的scan方法当中。在方法中,我定义了一个Map<String,Integer>类型的参数,并把我要识别的单词符号和种别码传进去。接着将传入的字符串变成一个一个字符,首先判断字符是不是字母,如果是就一直读,知道不是字母为止,然后看map集合中是否有符合的单词符号,如果有就new一个Binaries对象,然后存放到result数组中。接着判断是不是数字,也是一直读直到不是数字为止,并同样和map中的元素进行比对。然后判断字符是否为空,如果是空就跳过,接着判断是否是map中的一些特殊字符,如果是就加入到result数组中。最后对一些特别的字符如:>、>=,这种无法一次性判断的单词符号,进行特殊处理(例如:字符串中有>=,则优先匹配>=),最后加到结果中即可。
流程图:
Binaries类:
public class Binaries {
private int syn;
private String token;
public Binaries(int syn, String token) {
this.syn = syn;
this.token = token;
}
@Override
public String toString() {
return "(" + syn + "," + token + ')';
}
}
LexicalAnalyzer类(词法分析器):
import java.util.*;
public class LexicalAnalyzer {
public LexicalAnalyzer(String input) {
}
public static void main(String[] args) {
System.out.println("\n 请输入一段字符串(以#结束):\n");
Scanner scanner = new Scanner(System.in);
StringBuilder output = new StringBuilder();
boolean end = false; // 增加一个布尔变量来标记是否结束输入
while (!end) { // 修改while循环的条件
String input = scanner.nextLine();
char[] chars = input.toCharArray();
for (char c : chars) {
output.append(c);
if (c == '#') {
end = true; // 如果遇到#字符,就将变量设为true
break; // 跳出for循环
}
}
}
List<Binaries> binaries = scan(output.toString());
for (Binaries b : binaries) {
System.out.println(b);
}
}
//判断当前字符是不是字母
private static boolean isLetter(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
//判断当前字符是不是数字
private static boolean isDigit(char c) {
return (c<='9'&&c>='0');
}
private static List<Binaries> scan(String output) {
Map<String,Integer> map=new HashMap<>();
map.put("begin",1);
map.put("if",2);
map.put("then",3);
map.put("while",4);
map.put("do",5);
map.put("end",6);
map.put("+" ,13);
map.put("-" ,14);
map.put("*" ,15);
map.put(";" ,26);
map.put("(" ,27);
map.put(")" ,28);
map.put("#" ,0);
char[] chars = output.toCharArray();
Binaries binaries;
List<Binaries> result=new ArrayList<>();
for(int i=0;i<chars.length;i++){
if(isLetter(chars[i])){
StringBuilder sb=new StringBuilder();
while(isLetter(chars[i])){
sb.append(chars[i]);
i++;
}
i--;
if(map.containsKey(sb.toString())){
Integer integer = map.get(sb.toString());
binaries=new Binaries(integer,sb.toString());
}else{
binaries=new Binaries(10,sb.toString());
}
result.add(binaries);
}else if(isDigit(chars[i])){
StringBuilder sb=new StringBuilder();
while(isDigit(chars[i])){
sb.append(chars[i]);
i++;
}
i--;
binaries=new Binaries(11,sb.toString());
result.add(binaries);
}else if(chars[i]==' '){
i++;
i--;
}else if(map.containsKey(chars[i]+"")){
binaries=new Binaries(map.get(chars[i]+""),chars[i]+"");
result.add(binaries);
}else{
switch (chars[i]){
case '/':
if(addBinaries(chars, result, i, '/', 29, 16))
i++;
continue;
case ':':
if(addBinaries(chars,result,i,'=',18,17))
i++;
continue;
case '<':
StringBuilder sb=new StringBuilder();
if(chars[i+1]=='>'){
sb.append(chars[i]);
i++;
sb.append(chars[i]);
binaries=new Binaries(21,sb.toString());
result.add(binaries);
continue;
}else if(chars[i+1]=='=') {
sb.append(chars[i]);
i++;
sb.append(chars[i]);
binaries = new Binaries(22, sb.toString());
result.add(binaries);
continue;
} else{
binaries=new Binaries(20,chars[i]+"");
result.add(binaries);
continue;
}
case '>':
if(addBinaries(chars,result,i,'=',24,23))
i++;
continue;
case '=':
binaries=new Binaries(25,chars[i]+"");
result.add(binaries);
continue;
default:
System.out.println("存在无法识别的字符:"+chars[i]);
}
}
}
return result;
}
private static boolean addBinaries(char[] chars, List<Binaries> result, int i,char c,int tar1,int tar2) {
Binaries binaries;
StringBuilder sb=new StringBuilder();
if(chars[i +1]==c){
sb.append(chars[i]);
i++;
sb.append(chars[i]);
binaries=new Binaries(tar1,sb.toString());
result.add(binaries);
return true;
}else {
binaries=new Binaries(tar2, chars[i]+"");
result.add(binaries);
return false;
}
}
}