java简单地实现Tiny语言的词法分析器

最新推荐文章于 2024-08-10 07:38:40 发布

Choimeyu

最新推荐文章于 2024-08-10 07:38:40 发布

阅读量2.6k

点赞数

分类专栏：其他文章标签：语言 Tiny java

本文链接：https://blog.csdn.net/JINYUANCAI/article/details/50801703

版权

其他专栏收录该内容

3 篇文章 0 订阅

订阅专栏

只是简单地编写，实现了一些简单的功能，没有考虑到代码优化等问题。

Tiny语言定义

一、字符集定义

1． <字符集> → <字母>│<数字>│<单界符>

2． <字母> → A│B│…│Z│a│b│…│z

3． <数字> → 0│1│2│…│9

4． <单界符> → +│-│*│/│=│<│>│(│)│[│]│:│. │; │, │' │_ │{ │} │%

二、单词集定义

5．<单词集> → <保留字>│<双界符>│<标识符>│<常数>│<单界符>

6．<保留字> → and│array│begin│bool│call│case│char│const│do│double│else│end│false│for│if│int│not│of│or│procedure│program│read│real│repeat│set│stop│then│to│true│until│var│while│write

7．<双界符> → <=│>=│:= │/*│*/│

8．<标识符> → <字母>│_ <字母>│_ <数字>│<标识符> <数字>│<标识符> <字母>│<标识符> _

9．<常数> → <整数>│<布尔常数>│<字符常数>

10．<整数> → <数字>│<整数> <数字>

11．<布尔常数> → true│false

12．<字符常数> → ' 除 ' 外的任意字符串'

三、数据类型定义

13．<类型> → int│bool│char

四、表达式定义

14．<表达式> → <算术表达式>│<布尔表达式>│<字符表达式>

15．<算术表达式> → <算术表达式> + <项>│<算术表达式> - <项>│<项>

16．<项> → <项> * <因子>│<项> / <因子>│<因子>

17．<因子> → <算术量>│- <因子>

18．<算术量> → <整数>│<标识符>│（ <算术表达式> ）

19．<布尔表达式> → <布尔表达式> or <布尔项>│<布尔项>

20．<布尔项> → <布尔项> and <布因子>│<布因子>

21．<布因子> → <布尔量>│not <布因子>

22．<布尔量> → <布尔常量>│<标识符>│（ <布尔表达式> ）│

<标识符> <关系符> <标识符>│<算术表达式> <关系符> <算术表达式>

23．<关系符> → <│<=│>=│>│=

24．<字符表达式> → <字符常数>│<标识符>

五、语句定义

25．<语句> → <赋值句>│<if句>│<while句>│<repeat句>│<复合句>

26．<赋值句> → <标识符> := <算术表达式>

27．<if句>→ if <布尔表达式> then <语句>│if <布尔表达式> then <语句> else <语句>

28．<while句> → while <布尔表达式> do <语句>

29．<repeat句> → repeat <语句> until <布尔表达式>

30．<复合句> → begin <语句表> end

31．<语句表> → <语句> ；<语句表>│<语句>

六、程序定义

32．<程序> → program <标识符> ；<变量说明> <复合语句> .

33．<变量说明> → var <变量定义>│ε

34．<变量定义> → <标识符表> ：<类型> ；<变量定义>│<标识符表> ：<类型> ；

35．<标识符表> → <标识符> ，<标识符表>│<标识符>

七、 Tiny语言单词编码

单词	种别码	单词	种别码	单词	种别码
and	1	program	21	+	41
array	2	read	22	,	42
begin	3	real	23	-	43
bool	4	repeat	24	.	44
call	5	set	25	/	45
case	6	stop	26	/*	46
char	7	then	27	:	47
const	8	to	28	:=	48
do	9	true	29	;	49
double	10	until	30	<	50
else	11	var	31	<=	51
end	12	while	32	=	52
false	13	write	33	>	53
for	14	标识符	34	>=	54
if	15	整数	35	[	55
int	16	字符常数	36	]	56
not	17	(	37	_	57
of	18	)	38	{	58
or	19	*	39	}	59
procedure	20	*/	40	%	60

能发现下列词法错误和指出错误性质和位置：

非法字符，即不是Tiny字符集的符号；例如@ ￥等符号

字符常数缺右边的单引号（字符常数要求左、右边用单引号界定，不能跨行）；

注释部分缺右边的界符*/（注释要求左右边分别用/*和*/界定，不能跨行）；

发现错误后要能够继续编译下去，不能只报一个错误；

八、测试程序与样板输出

测试程序1：

program _example;

var A,B,C,D:int;

begin

A:=1; B:=5; C:=3; D:= '4';/* Variable initialization*/

while A<C and B>D do

if A=1 then C:=C+1 else

while A<=D do A:=A*2

end.

样板输出1：（要求在屏幕上显示）

(21 , program ) (34 , _example ) (49 , ; ) (31 , var ) (34 , A )

(42 , , ) (34 , B ) (42 , , ) (34 , C ) (42 , , )

(34 , D ) (47 , : ) (16 , int ) (49 , ; ) (3 , begin )

(34 , A) ( 48 , := ) (35 , 1 ) ( 49 , ; ) (34 , B )

(48 , := ) (35 , 5 ) (49 , ; ) (34 , C ) (48 , := )

(35 , 3 ) (49 , ; ) (34 , D ) (48 , := ) (36 , 4 )

(49, ; ) (32, while ) (34 , A ) (50 , < ) (34 , C )

(1, and ) (34 , B ) (53 , > ) (34 , D ) (9 , do )

(15, if ) (34 , A ) (52 , = ) (35 , 1 ) (27 , then )

(34, C ) (48 , := ) (34, C ) (41 , + ) (35 , 1 )

(11, else ) (32, while ) (34,A ) (51 , <= ) (34 , D )

(9, do ) (34,A ) (48 , := ) (34 , A ) (39 , * )

(35,2 ) (12 , end ) (44 , . )

代码如下：

package test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Scanner;

/**
 * 
 * @author user
 *
 */
public class Test {
	
	//保存关键字的数组
	private static String []keyWord = {"and","array","begin","bool","call",
    "case","char","const","do","double","else","end","false","for","if",
    "int","not","of","or","procedure","program","read","real","repeat","set",
	"stop","then","to","true","until","var","while","write"};
	
	private static String dyhStr = "'";
	//保存符号的数组
	private static char []delimiter = {'(',')','*','+',',','-','.','/',':',';',
	'<','=','>','[',']','_','{','}','%',dyhStr.charAt(0)};

	private static FileInputStream fis;
	
	/**
	 * 判断是否是关键字
	 * @param str
	 * @return
	 */
	public static boolean isKeyWord(String str){
		boolean flag = false;
		for(int i = 0; i < keyWord.length; i++){
			if(keyWord[i].equals(str)){
				flag = true;
			}
		}
		return flag;
	}
	
	/**
	 * 判断是否是数字
	 * @param ch
	 * @return
	 */
	public static boolean isNumber(char ch){
		boolean flag = false;
		if(ch >= '0' && ch<= '9'){
			flag = true;
		}
		return flag;
	}
	
	/**
	 * 判断是否是字母
	 * @param ch
	 * @return
	 */
	public static boolean isLetter(char ch){
		boolean flag = false;
		if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')){
			flag = true;
		}
		return flag;
	}
	
	/**
	 * 判断是否是字符
	 * @param ch
	 * @return
	 */
	public static boolean isDlimeter(char ch){
		boolean flag = false;
		for(int i = 0; i < delimiter.length; i++){
			if(ch == delimiter[i]){
				flag = true;
			}
		}
		return flag;
	}
	
	
	/**
	 * 获取关键字的种别码
	 * @param str
	 * @return
	 */
	public static int getKeyWordIndex(String str){
		int keyWordIndex = 0;
		for(int i = 0; i< keyWord.length; i++){
			if(str.equals(keyWord[i]))
				keyWordIndex = i+1;  //数组下标从0开始
		}
		return keyWordIndex;
	}
	
	/**
	 * 获取单界符的种别码
	 * @param ch
	 * @return
	 */
	public static int getDelimiterIndex(char ch){
		int delimiterIndex = 0;
		switch (ch) {
		case '(':
			delimiterIndex = 37;
			break;
		case ')':
			delimiterIndex = 38;
			break;
		case '*':
			delimiterIndex = 39;
			break;
		case '+':
			delimiterIndex = 41;
			break;
		case ',':
			delimiterIndex = 42;
			break;
		case '-':
			delimiterIndex = 43;
			break;
		case '.':
			delimiterIndex = 44;
			break;
		case '/':
			delimiterIndex = 45;
			break;
		case ':':
			delimiterIndex = 47;
			break;
		case ';':
			delimiterIndex = 49;
			break;
		case '<':
			delimiterIndex = 50;
			break;
		case '=':
			delimiterIndex = 52;
			break;
		case '>':
			delimiterIndex = 53;
			break;
		case '[':
			delimiterIndex = 55;
			break;
		case ']':
			delimiterIndex = 56;
			break;
		case '_':
			delimiterIndex = 57;
			break;
		case '{':
			delimiterIndex = 58;
			break;
		case '}':
			delimiterIndex = 59;
			break;
		case '%':
			delimiterIndex = 60;
			break;
		}
		return delimiterIndex;
	}
	
	/**
	 * 获取双界符的种别码
	 * @param str
	 * @return
	 */
	public static int getDoubleDelimiter(String str){
		//":=",">=","<=","/*","*/"
		int index = 0;
		if(str.equals("*/")){
			index = 40;
		}
		if(str.equals("/*")){
			index = 46;
		}
		if(str.equals(":=")){
			index = 48;
		}
		if(str.equals("<=")){
			index = 51;
		}
		if(str.equals(">=")){
			index = 54;
		}
		return index;
	}
	
	/**
	 * 根据文本路径读取文本内容
	 * @param path
	 * @return
	 * @throws IOException 
	 */
	public static String readFile(String path) throws IOException{
		File file = new File(path);
		if(!file.exists() || file.isDirectory()){
			throw new FileNotFoundException();
		}
		fis = new FileInputStream(file);
		byte[] buffer = new byte[1024];
		StringBuffer stringBuffer = new StringBuffer();
		while((fis.read(buffer)) != -1){
			stringBuffer.append(new String(buffer));
			buffer = new byte[1024];
		}
		return stringBuffer.toString();
	}
	
	/**
	 * Tiny语言的词法分析
	 */
	public static void lexcialAnalysis(String filePath){	
		

		try{
			String fileTxt = readFile(filePath).trim();
			System.out.println("Tiny语言的源程序如下：");
			System.out.println(fileTxt);
			System.out.println("进行词法分析");
		}catch(IOException e){
			e.printStackTrace();
		}
		File file = new File(filePath);
		int row = 0, count = 0;//row行号,count统计二元组的个数
		char ch; 
		String lineStr; 
		BufferedReader bufferedReader;
		try{
			bufferedReader = new BufferedReader(new FileReader(file));
			//按行进行词法分析
			while((lineStr = bufferedReader.readLine()) != null){
				int i = 0;
				row++;
				int col = 1 ; //列号
				String dyh = "'";
				while(i <= lineStr.length()-1){
					ch = lineStr.charAt(i);
					//判断读取的第一个字符是否为字母或者是下划线
					if(isLetter(ch) || ch == '_'){
	/*********/			StringBuffer stringBuffer = new StringBuffer();
						stringBuffer.append(ch);
						col++;
						//读取下一个字符
						ch = lineStr.charAt(++i);
						//非第一个字符时，可以为字母，下划线和数字
						while((isLetter(ch)) || isNumber(ch) || ch == '_'){
							stringBuffer.append(ch);
							//如果读取到行的末尾则跳出循环，否则继续读取
							if(i == lineStr.length() - 1){
								i++;
								break;
							}
							else{
								ch = lineStr.charAt(++i);
							}
							col++;
						}
						
						//判断当前字符串是否是关键字
						if(isKeyWord(stringBuffer.toString())){
							//获取该关键字的种别码
							int kindCode = getKeyWordIndex(stringBuffer.toString());
							//输出该关键字的二元组
							System.out.print("("+ kindCode + "," + stringBuffer.toString() + ")"+ "  ");
							//二元组个数加1
							count++;
						}
						//标识符，输出标识符的二元组
						else{
							System.out.print("(" + 34 + ","+ stringBuffer.toString() +")"+ "  ");
							count++;
						}
						//每5个二元组换行
						if(count % 5 == 0){
							System.out.println();
						}
					}
					//如果是单界符
					else if(isDlimeter(ch)){
						StringBuffer stringBuffer = new StringBuffer();
						
						//如果是','或';'或'=','.'，直接输出二元组
						if((ch == ',')||(ch == ';')||(ch == '=')||(ch == '.')){
							System.out.print("(" + getDelimiterIndex(ch) + "," + ch + ")" + "  ");
							i++;
							col++;
							count++;
						}
						//如果是'('或')'或'['或']'，直接输出二元组
						else if((ch == '(')||(ch == ')')||(ch == '[')||(ch == ']')){
							System.out.print("(" + getDelimiterIndex(ch) + "," + ch + ")" + "  ");
							i++;
							col++;
							count++;
						}
						//如果是'+'或'-'或'*'，直接输出二元组
						else if((ch == '+')||(ch == '-')||(ch == '*')){
							System.out.print("(" + getDelimiterIndex(ch) + "," + ch + ")" + "  ");
							i++;
							col++;
							count++;
						}
						//如果是'>'或'<'或':',需要继续读取一个字符进行判断是否是双界符
						else if((ch == '>')||(ch == '<')||(ch == ':')){
							stringBuffer.append(ch);
							col++;
							//读取下一个字符
							char nextCh = lineStr.charAt(++i); 
							//如果字符为'='
							if(nextCh == '='){
								stringBuffer.append(nextCh);
								col++;
								//输出双界符相关的二元数组
								System.out.print("(" + getDoubleDelimiter(stringBuffer.toString()) //
													+ "," + stringBuffer.toString() + ")" + "  ");
								i++;
								count++;
							}
							//直接输出单界符'>'或'<'或':'
							else{
								System.out.print("(" + getDelimiterIndex(ch) + "," + ch + ")" + "  ");
								count++;
							}
						}
						//如果读取的字符为/或'或"
						else if((ch == '/')||ch == dyh.charAt(0)){
							stringBuffer.append(ch);
							col++;
							if(i == lineStr.length() -1){
								i++;
								break;
							}
							//继续读取字符
							else{
								ch = lineStr.charAt(++i);
							}
							
							if(ch == '*'){
								stringBuffer.append(ch);
								ch = lineStr.charAt(++i);
								col++;
								while(ch != '*'){
									if(i == lineStr.length() - 1){
										i++;
										System.out.print("error:注释不匹配，第" + row +
												"行，第" + col + "列");
										break;
									}
									else{
										ch = lineStr.charAt(++i);
										col++;
									}
								}
								if(i <= lineStr.length()){
									break;
								}
								else{
									ch = lineStr.charAt(++i);
								}
								col++;
								if(ch == '/'){
									i++;
									continue;
								}
								else{
									System.out.print("error:注释不匹配"+"第"+row+"行,第"+col+"列");
								}
							}
							if(stringBuffer.charAt(0) == dyh.charAt(0)){
								StringBuffer stringBuffer1 = new StringBuffer();
								stringBuffer1.append(ch);
								col++;
								if(i == lineStr.length() - 1){
									i++;
									break;
								}
								else{
									ch = lineStr.charAt(++i);
									col++;
									while(ch != dyh.charAt(0)){
										stringBuffer.append(ch);
										if(i == lineStr.length() - 1){
											i++;
											break;
										}
										else{
											//继续读取字符
											ch = lineStr.charAt(++i);
											col++;
										}
									}
								}
								if( ch == dyh.charAt(0)){
									//输出的是字符常数
									System.out.print("(" + 36 + "," + stringBuffer1.toString() + ")" + "  ");
									count++;
								}
								else{
									System.out.print("error:单引号不匹配"+"第"+row+"行,第"+col+"列");
								}
								i++;
							}
						}
						if(count % 5 == 0){
							System.out.println();
						}
					}
					//非法字符判断
					else if(isDlimeter(ch) == false && isNumber(ch) == false && isLetter(ch) == false && ch != ' ') {
						System.out.print("error:出现非法字符："+ch+",第"+row+"行,第"+col+"列");
						i++;
						col++;
					}
					//第一次读入的是数字
					else if(isNumber(ch)){
						StringBuffer stringBuffer = new StringBuffer();
						stringBuffer.append(ch);
						col++;
						ch = lineStr.charAt(++i);
						if(isNumber(ch)){
							while(isNumber(ch)){
								stringBuffer.append(ch);
								col++;
								ch = lineStr.charAt(++i);
							}
							System.out.print("(" + 35 + "," + stringBuffer.toString() + ")" + "");
							count++;
						}
						//一位数时，输出二元组 
						else {
							System.out.print("(" + 35 + "," + stringBuffer.toString()+ ")" + " ");
							count++;
							}
						if(isLetter(ch)){
							while(isLetter(ch)){
								stringBuffer.append(ch);
								col++;
								ch = lineStr.charAt(++i);
							}
							System.out.print("error:非法字符"+ stringBuffer.toString() + " 第 "+ row +" 行,第"+col+" 列出错");
						}
						if(count % 5 == 0){
							System.out.println();
						}
					}
					else{
						i++;
						col++;
					}
				}
			}
		}
		catch(Exception e){
			e.printStackTrace();
		}
	}
	
	public static void main(String []args){
		Scanner sc = new Scanner(System.in,"UTF-8");
		System.out.println("姓名：xxx   班级：xx软件工程xx班   学号：123456789");
		System.out.println("请输入文件路径，例如：D://test.txt");
		lexcialAnalysis(sc.nextLine());
	}
}

运行结果：