【实验内容】
对一个简单语言的子集编制一个一遍扫描的词法分析程序。
【实验要求】
(1)待分析的简单语言的词法
1) 关键字
begin if then while do end
2) 运算符和界符
:= + - * / < <= > >= <> = ; ( ) #
3) 其他单词是标识符(ID)和整形常数(NUM),通过以下正规式定义:
ID=letter(letter|digit)*
NUM=digitdigit*
4) 空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、界符和关键字,词法分析阶段通常被忽略。
单词符号 | 种别码 | 单词符号 | 种别码 |
begin | 1 | : | 17 |
if | 2 | := | 18 |
then | 3 | < | 20 |
while | 4 | <> | 21 |
do | 5 | <= | 22 |
end | 6 | > | 23 |
letter(letter|digit)* | 10 | >= | 24 |
digitdigit* | 11 | = | 25 |
+ | 13 | ; | 26 |
- | 14 | ( | 27 |
* | 15 | ) | 28 |
/ | 16 | # | 0 |
写的比较杂
public class Test {
//存关键字和符号方便调用
static String token[]=new String[] {"#","begin","if","then","while","do","end","null","null","null","null",
"null","null","+","-","*","/",":",":=","null","<","<>","<=",">",">=","=",";","(",")"};
static Map<Integer, String> map = new HashMap<>();
//单词种别码
static int syn;
//单词自身字符串
static String tokeN;
//指向字符位数
static int p;
//输入字符
static String prog;
public static void main(String[] args) {
// TODO Auto-generated method stub
//将各种单词符号和种别码匹配,map方便调用
for(int i=0;i<29;i++) {
map.put(i,token[i]);
}
System.out.println("\n please input string:");
Scanner sc=new Scanner(System.in);
prog=sc.nextLine();
p=0; //字符位数初值
do {
scan(); //调用扫描函数,判断为何种单词符号
switch(syn)
{
//当为标识符和数字时
case 11:
case 10:
System.out.println("(11,"+tokeN+") ");
break;
case -1:
System.out.println("error");
break;
//当为关键字和和符号时
default:
System.out.println("("+syn+","+token[syn]+") ");
break;
}
}while(syn!=0);
}
static void scan() {
//int length=prog.length();
char ch = prog.charAt(p); //取第p个字符
while(ch==' ') {
p++;
ch = prog.charAt(p);
}
//关键字,标识符判断
if(ch>='0' && ch<='9' || ch>='A' && ch<='z') {
int s1=p;
while(ch>='0' && ch<='9' || ch>='A' && ch<='z') {
p++;
ch = prog.charAt(p);
}
tokeN=prog.substring(s1,p);
//和关键字表比较
if(tokeN.equals(map.get(1)))
syn=1;
else if(tokeN.equals(map.get(2)))
syn=2;
else if(tokeN.equals(map.get(3)))
syn=3;
else if(tokeN.equals(map.get(4)))
syn=4;
else if(tokeN.equals(map.get(5)))
syn=5;
else if(tokeN.equals(map.get(6)))
syn=6;
else //非关键字,标识符
syn=10;
}
else //数字判断
if(ch>='0' && ch<='9'){
int s1=p;
while(ch>='0' && ch<='9') {
p++;
ch = prog.charAt(p);
syn=11;
}
tokeN=prog.substring(s1,p);
}
else //其他符号判断
switch(ch)
{
case '+':syn=13;
p++;break;
case '-':syn=14;
p++;break;
case '*':syn=15;
p++;break;
case '/':syn=16;
p++;break;
case ':':
p++;
ch = prog.charAt(p);
if(ch=='=')
syn=18;
else {
syn=17;
p--;
}
p++;
break;
case '<':
p++;
ch = prog.charAt(p);
if(ch=='>')
syn=21;
if(ch=='=')
syn=22;
else {
syn=20;
p--;
}
p++;
break;
case '>':
p++;
ch = prog.charAt(p);
if(ch=='=')
syn=24;
else {
syn=23;
p--;
}
p++;
break;
case '=':syn=25;
p++;break;
case ';':syn=26;
p++;break;
case '(':syn=27;
p++;break;
case ')':syn=28;
p++;break;
case '#':syn=0;
p++;break;
default:syn=-1;
break;
}
}
}