import java.awt.*;
import java.util.Scanner;
/**
* Created by s2002 on 2016/9/30.
*/
public class com {
public static void main(String[] args) {
//限制字符串最大值
final int MAX_LEN = 100;
//关键字
String[] key_word = new String[]{"begin","end","if","then","while","do"};
//录入用户输入
Scanner input = new Scanner(System.in);
System.out.print("Please input a string <end with '#'>:");
String uString = input.nextLine();
char[] analyseData = new char[MAX_LEN];
int index = 0, key = 0;
List list = new List();
do {
String compareStr = null;
char temp = uString.charAt(index);
list = extactCharacters(temp, analyseData, key_word, uString, index, compareStr);
if (list.getItemCount() == 0) {
index++;
continue;
}
// 规定List的第一个元素为index,第二个元素为key
index = Integer.parseInt(list.getItem(0));
key = Integer.parseInt(list.getItem(1));
String words = list.getItem(2);
System.out.println("< " + key + " ," + words + " >");
} while (key != 0);
}
public static List extactCharacters(char temp, char[] analyseDate, String[] keywords, String uString, int index,
String compareStr) {
int keyID = -1, m = 0;
List list = new List();
//判断下一个读入的字符是否为空格,若读取到空格则跳过,提取下一个字符进行判断
while (temp != ' ') {
//判断当前字符是字母或者数字和字母的组合
if (temp >= 'a' && temp <= 'z') {
m = 0;
// 当读取到不是大小写字母或者数字时候判断为一个单词读取完成
while (temp >= 'a' && temp <= 'z' || temp >= 'A' && temp <= 'Z' || temp >= '0' && temp <= '9') {
analyseDate[m++] = temp;
compareStr += temp + "";
temp = uString.charAt(++index);
}
// 与读取出来的字符判断是否为关键字
compareStr = compareStr.substring(4);
for (int i = 0; i < 6; i++) {
if (compareStr.equals(keywords[i])) {
keyID = i + 1;
list.add(index + "");
list.add(keyID + "");
list.add(compareStr);
return list;
}
}
//若为非关键字就当作为标识符
keyID = 10;
list.add(index + "");
list.add(keyID + "");
list.add(compareStr);
return list;
}
//判断当前字符是否为数字
else if (temp >= '0' && temp <= '9') {
m = 0;
String tempTokens = null;
// 对后面的字符进行判断是否为数字
while (temp >= '0' && temp <= '9') {
analyseDate[m++] = temp;
tempTokens += temp;
temp = uString.charAt(++index);
}
// 不是数字则返回种别码,结束当前方法
keyID = 11;
tempTokens = tempTokens.substring(4);
list.add(index + "");
list.add(keyID + "");
list.add(tempTokens + "");
return list;
}
m = 0;
//判断当前字符是否为其他关系运算符
String token = null;
switch (temp) {
case '<':
// String token = null;
analyseDate[m++] = temp;
token += temp;
if (uString.charAt(++index) == '=') {
analyseDate[m++] = temp;
keyID = 22;
token += uString.charAt(index++);
} else if (uString.charAt(++index) == '>') {
analyseDate[m++] = temp;
keyID = 21;
token += uString.charAt(index++);
} else {
keyID = 23;
}
list.add(index + "");
list.add(keyID + "");
token = token.substring(4);
list.add(token);
return list;
case '>':
analyseDate[m++] = temp;
token += temp;
if (uString.charAt(++index) == '=') {
keyID = 24;
analyseDate[m++] = temp;
token += uString.charAt(index++);
} else {
keyID = 20;
}
list.add(index + "");
list.add(keyID + "");
token = token.substring(4);
list.add(token);
return list;
case ':':
analyseDate[m++] = temp;
token += temp;
if (uString.charAt(++index) == '=') {
keyID = 18;
// analyseDate[m++] = temp;
analyseDate[m++] = uString.charAt(index);
token += uString.charAt(index++);
} else {
keyID = 17;
}
list.add(index + "");
list.add(keyID + "");
token = token.substring(4);
list.add(token);
return list;
case '*':
keyID = 13;
break;
case '/':
keyID = 14;
break;
case '+':
keyID = 15;
break;
case '-':
keyID = 16;
break;
case '=':
keyID = 25;
break;
case ';':
keyID = 26;
break;
case '(':
keyID = 27;
break;
case ')':
keyID = 28;
break;
case '#':
keyID = 0;
break;
default:
keyID = -1;
break;
}
analyseDate[m++] = temp;
list.add(++index + "");
list.add(keyID + "");
list.add(temp + "");
return list;
}
return list;
}
}
程序功能:可以模拟将用户输入的代码分解成词,进行词法分析,并将其字符与种别码对应输出。
种别码表:
单词符号 | 种别码 | 单词符号 | 种别码 |
begin | 1 | : | 17 |
if | 2 | := | 18 |
then | 3 | < | 20 |
while | 4 | <= | 21 |
do | 5 | <> | 22 |
end | 6 | > | 23 |
l(l|d)* | 10 | >= | 24 |
dd* | 11 | = | 25 |
+ | 13 | ; | 26 |
- | 14 | ( | 27 |
* | 15 | ) | 28 |
/ | 16 | # | 0 |