词法分析器

简易词法分析器: 输入一段程序,分离出每一个符号串,并标记其类型

package com.parting_soul;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * 词法分析器
 * 
 * @author parting_soul
 *
 */
public class LexicalAnalyzer {
	private String mText;

	/**
	 * 当前指针的位置
	 */
	private int mCurrentPos;

	private List<Constant<?>> constantList;

	private List<Identify> identifyList;

	private List<String> operatorLists;

	private List<String> limitLists;

	private List<String> keywordsLists;

	private static Set<Character> operatorSet;

	private static Set<Character> limitSet;

	private static Set<String> keywordsSet;

	private List<Result> lists = new ArrayList<>();

	public String getText() {
		return mText;
	}

	public void setText(String mText) {
		this.mText = mText;
	}

	static {
		operatorSet = new HashSet<>();
		operatorSet.add('+');
		operatorSet.add('-');
		operatorSet.add('*');
		operatorSet.add('/');
		operatorSet.add('=');

		limitSet = new HashSet<>();
		limitSet.add(',');
		limitSet.add(';');
		limitSet.add('(');
		limitSet.add(')');
		limitSet.add('{');
		limitSet.add('}');

		keywordsSet = new HashSet<>();
		keywordsSet.add("int");
		keywordsSet.add("main");
		keywordsSet.add("boolean");
		keywordsSet.add("double");
		keywordsSet.add("float");
		keywordsSet.add("public");
		keywordsSet.add("return");
		keywordsSet.add("private");
		keywordsSet.add("static");
		keywordsSet.add("String");
	}

	public LexicalAnalyzer() {
		constantList = new ArrayList<>();
		identifyList = new ArrayList<>();
		operatorLists = new ArrayList<>();
		limitLists = new ArrayList<>();
		keywordsLists = new ArrayList<>();
	}

	public void init() {
		constantList.clear();
		identifyList.clear();
		operatorLists.clear();
		limitLists.clear();
		keywordsLists.clear();
		mCurrentPos = 0;
	}

	public static void main(String[] args) {
		// Scanner scanner = new Scanner(System.in);
		LexicalAnalyzer analyzer = new LexicalAnalyzer();
		String str = intPut();
		analyzer.setText(str);
		// while (!(analyzer.mText = scanner.nextLine()).equals("-1")) {
		analyzer.analyze();
		// analyzer.print();
		analyzer.printToken();
		// analyzer.init();
		// }
		// scanner.close();

	}

	/**
	 * 从文件读入源程序
	 * 
	 * @return
	 */
	private static String intPut() {
		String result = "";
		BufferedReader br = null;
		try {
			FileReader reader = new FileReader(new File("test.txt"));
			br = new BufferedReader(reader);
			String str = null;
			while ((str = br.readLine()) != null) {
				result += str;
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (br != null) {
				try {
					br.close();
				} catch (IOException e) {
					e.printStackTrace();
				} finally {
					br = null;
				}
			}
		}
		return result;
	}

	private void print() {
		System.out.print("常量:");
		for (Constant<?> constant : constantList) {
			System.out.print(constant + " ");
		}
		System.out.println();
		System.out.print("标识符:");
		for (Identify identify : identifyList) {
			System.out.print(identify + " ");
		}
		System.out.println();
		System.out.print("关键字:");
		for (String s : keywordsLists) {
			System.out.print(s + " ");
		}
		System.out.println();
		System.out.print("界限符:");
		for (String s : limitLists) {
			System.out.print(s + " ");
		}
		System.out.println();
		System.out.print("操作符:");
		for (String s : operatorLists) {
			System.out.print(s + " ");
		}
	}

	public void printToken() {
		System.out.println(mText);
		for (Result res : lists) {
			System.out.println(res);
		}
	}

	/**
	 * 分析输入的源程序
	 */
	public void analyze() {
		char currentC;
		while (mCurrentPos + 1 <= mText.length()) {
			currentC = mText.charAt(mCurrentPos);
			Result res = new Result();
			if (isSpace(currentC)) {
				// mCurrentPos++;
			} else if (isLetter(currentC)) {
				String identify = getIdentifier();
				if (isKeyWords(identify)) {
					keywordsLists.add(identify);
					res.descr = "关键字";
					res.value = identify;
					// System.out.print("关键字: " + identify + " ");
				} else {
					identifyList.add(new Identify(identify));
					// System.out.print("标识符: " + identify + " ");
					res.descr = "标志符";
					res.value = identify;
				}
				lists.add(res);
			} else if (isDigit(currentC)) {
				Constant<?> con = getConstant();
				constantList.add(con);
				// System.out.print("常量: " + con.value + " ");
				res.descr = "常量";
				res.value = con.value + "";
				lists.add(res);
			} else if (isLimit(currentC)) {
				// mCurrentPos++;
				limitLists.add(currentC + "");
				res.descr = "界限符";
				res.value = currentC + "";
				lists.add(res);
				// System.out.print("界限符: " + currentC + " ");
			} else if (isOperator(currentC)) {
				// mCurrentPos++;
				operatorLists.add(currentC + "");
				// System.out.print("操作符: " + currentC + " ");
				res.descr = "操作符";
				res.value = currentC + "";
				lists.add(res);
			}
			mCurrentPos++;

			// System.out.println("analyze mcurrentPos " + mCurrentPos);
		}
	}

	private boolean isKeyWords(String identify) {
		return keywordsSet.contains(identify);
	}

	/**
	 * 是否为数字字符
	 * 
	 * @param c
	 * @return
	 */
	public boolean isDigit(char c) {
		if (c >= '0' && c <= '9') {
			return true;
		}
		return false;
	}

	/**
	 * 判断是否为字母
	 * 
	 * @param c
	 * @return
	 */
	public boolean isLetter(char c) {
		if (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z') {
			return true;
		}
		return false;
	}

	/**
	 * 是否为空格
	 * 
	 * @param c
	 * @return
	 */
	public boolean isSpace(char c) {
		if (c == ' ' || c == '\t') {
			return true;
		}
		return false;
	}

	/**
	 * 得到标志符
	 * 
	 * @return
	 */
	public String getIdentifier() {
		String result = "";
		char currentC = mText.charAt(mCurrentPos);
		// System.out.println(currentC);
		if (isLetter(currentC) || currentC == '_') {
			result += currentC;
			mCurrentPos++;
			currentC = mText.charAt(mCurrentPos);
			while (isDigit(currentC) || isLetter(currentC)) {
				result += currentC;
				mCurrentPos++;
				if (mCurrentPos + 1 > mText.length())
					break;
				currentC = mText.charAt(mCurrentPos);
			}
			// 跳出循环有两种情况,一种匹配完所有的串,还有一个为遇到非数字,字母
			if (!isDigit(currentC) && !isLetter(currentC)) {
				mCurrentPos--;
			}
		}
		// System.out.println("getIdentifier mCurrentPos " + mCurrentPos);
		return result;
	}

	/**
	 * 得到整数
	 * 
	 * @return
	 */
	public Constant<Integer> getIntConstant() {
		Constant<Integer> result = null;
		char currentC = mText.charAt(mCurrentPos);
		if (isDigit(currentC)) {
			result = new Constant<>();
			result.value = charToInt(currentC);
			result.len++;
			mCurrentPos++;
			if (mCurrentPos + 1 < mText.length()) {
				currentC = mText.charAt(mCurrentPos);
				while (isDigit(currentC)) {
					result.value = result.value * 10 + charToInt(currentC);
					result.len++;
					mCurrentPos++;
					if (mCurrentPos + 1 > mText.length())
						break;
					currentC = mText.charAt(mCurrentPos);
				}
				if (!isDigit(currentC)) {
					mCurrentPos--;
				}
			}
		}
		// System.out.println("getIntConstant mCurrentPos " + mCurrentPos);
		return result;
	}

	/**
	 * 得到常量
	 * 
	 * @return
	 */
	public Constant<?> getConstant() {
		Constant<?> result = null;
		Constant<Integer> result1 = getIntConstant();
		boolean isHavePoint = false;
		if (mCurrentPos + 1 < mText.length()) {
			mCurrentPos++;
			char currentC = mText.charAt(mCurrentPos);
			if (currentC == '.') {
				mCurrentPos++;
				if (mCurrentPos + 1 < mText.length()) {
					currentC = mText.charAt(mCurrentPos);
					Constant<Integer> result2 = null;
					if (isDigit(currentC)) {
						result2 = getIntConstant();
						if (result2 != null) {
							double y = result2.value;

							for (int i = 0; i < result2.len; i++) {
								y /= 10.0;
							}
							Constant<Double> result3 = new Constant<Double>();
							result3.value = (double) result1.value.intValue() + y;
							result3.len = result1.len + result2.len;
							result = result3;
							isHavePoint = true;
						}
					}
				}
			} else {
				// 不是小数点,回退一个字符
				mCurrentPos--;
			}
		}
		if (!isHavePoint) {
			result = result1;
		}
		return result;
	}

	/**
	 * 字符转化为int型
	 * 
	 * @param c
	 * @return
	 */
	public int charToInt(char c) {
		return c - '0';
	}

	/**
	 * 是否为操作符
	 * 
	 * @param c
	 * @return
	 */
	public boolean isOperator(char c) {
		return operatorSet.contains(c);
	}

	/**
	 * 是否为界限符
	 * 
	 * @param c
	 * @return
	 */
	public boolean isLimit(char c) {
		return limitSet.contains(c);
	}

	class Constant<T> {
		T value;
		int len;

		public Constant() {
			len = 0;
		}

		@Override
		public String toString() {
			return "Constant [value=" + value + ", len=" + len + "]";
		}

	}

	class Identify {
		String name;
		String id;

		public Identify(String identifier) {
			this.name = identifier;
		}

		@Override
		public String toString() {
			return "Identify [name=" + name + ", id=" + id + "]";
		}

	}

	class Result {
		String descr;
		String value;

		@Override
		public String toString() {
			return "Result [descr=" + descr + ", value=" + value + "]";
		}

	}
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值