编译原理—词法分析器

最新推荐文章于 2023-10-24 22:30:28 发布

wuerting

最新推荐文章于 2023-10-24 22:30:28 发布

阅读量419

点赞数

文章标签： string character import interface class integer

本文链接：https://blog.csdn.net/wuerting/article/details/6334378

版权

import java.io.*;
import java.util.*;

/**
* 词法分析器
*
* 1.关键字：　abstract boolean break byte case catch char class continue
* default do double else extends final finally float for if implements import
* instanceof int interface long native new package private protected public
* return short static super switch synchronized this throw throws transient try
* void volatile while
*
* 2.标识符
*
* 3. 常数
*
* 4.运算符包括：+、-、*、/、=、>、<
*
* 5.分隔符包括：, ; { } ( ) [ ]
*
* 程序过程： 0.定义部分：定义常量、变量、数据结构。
*
* 1.初始化：从文件将源程序全部输入到字符缓冲区中。
*
* 2.取单词前：去掉多余空白。
*
* 3.取单词后：去掉多余空白（可选，看着办）。
*
* 4.取单词：读出单词的每一个字符，组成单词，分析类型。（关键是如何判断取单词结束？取到的单词是什么类型的单词？）
*
* 5.显示结果。
*/

public class Analyzer {

private String keyWords[] = { "abstract", "boolean", "break", "byte",
   "case", "catch", "char", "class", "continue", "default", "do",
   "double", "else", "extends", "final", "finally", "float", "for",
   "if", "implements", "import", "instanceof", "int", "interface",
   "long", "native", "new", "package", "private", "protected",
   "public", "return", "short", "static", "super", "switch",
   "synchronized", "this", "throw", "throws", "transient", "try",
   "void", "volatile", "while" }; // 关键字数组
private char operators[] = { '+', '-', '*', '/', '=', '>', '<', '&' }; // 运算符数组
private char separators[] = { ',', ';', '{', '}', '(', ')','[',']','_',':','、','.' ,'"'}; // 分隔符数组
private String fileSrcName; // 源程序名
private StringBuffer buffer = new StringBuffer(); // 缓冲区
private char ch; // 字符变量，存放最新读进的源程序字符
private static int i = 0;
private String strToken; // 字符数组，存放构成单词符号的字符串
private HashSet<String> retKeyWords = new HashSet<String>(); // 1.关键字结果
private HashSet<String> retIds = new HashSet<String>(); // 2.标识符结果
private HashSet<Integer> retConsts = new HashSet<Integer>(); // 3.常数结果
private HashSet<Character> retOperators = new HashSet<Character>(); // 4.运算符结果
private HashSet<Character> retSeparators = new HashSet<Character>(); // 5.分隔符结果

/**
* 构造方法
*/
public Analyzer() {

}

public Analyzer(String fileSrcName) {
this.fileSrcName = fileSrcName;
}

/**
* 将下一个输入字符读到ch中，搜索指示器前移一个字符
*/
public void getChar() {
ch = buffer.charAt(i);
i++;
}

/**
* 检查ch中的字符是否为空白，若是则调用getChar() 直至ch中进入一个非空白字符
*/
public void getBc() {
//  while (ch == ' ' || ch == '/t' || ch == '/n') { // 检查ch中的字符是否为空白
//   getChar();
//  }
  while(Character.isSpaceChar(ch))
   getChar();
}

/**
* 将ch连接到strToken之后
*/
public void concat() {
strToken += ch;
}

/**
* 判断字符是否为字母
*/
boolean isLetter() {
//  if ((ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'))
//   return true;
  if(Character.isLetter(ch))
   return true;
  return false;
}

/**
* 判断字符是否为数字
*/
boolean isDigit() {
//  if (ch >= '0' && ch <= '9')
//   return true;
  if(Character.isDigit(ch))
   return true;
  return false;
}

/**
* 将搜索指示器回调一个字符位置，将ch值为空白字
*/
public void retract() {
i--;
ch = ' ';
}

/**
* 判断单词是否为关键字
*/
public boolean isKeyWord() {

  for (int i = 0; i < keyWords.length; i++) {
   if (keyWords[i].equals(strToken))
    return true;
  }
  return false;
}

/**
* 判断是否为运算符
*/
public boolean isOperator() {
  for (int i = 0; i < operators.length; i++) {
   if (ch == operators[i])
    return true;
  }
  return false;
}

/**
* 判断是否为分隔符
*/
public boolean isSeparators() {
  for (int i = 0; i < separators.length; i++) {
   if (ch == separators[i])
    return true;
  }
  return false;
}

/**
* 将strToken插入到关键字表
*/
public void insertKeyWords(String strToken) {
retKeyWords.add(strToken);
}

/**
* 将strToken插入到符号表
*/
public void insertId(String strToken) {
retIds.add(strToken);
}

/**
* 将strToken中的常数插入到常数表中
*/
public void insertConst(String strToken) {
int num = Integer.parseInt(strToken);
retConsts.add(num);
}

/**
* 将ch插入到运算符表中
*/
public void insertOperators(char ch) {
retOperators.add(ch);
}

/**
* 将ch插入到分隔符表
*/
public void insertSeparators() {
retSeparators.add(ch);
}

/**
* 将源程序读入到缓冲区中
*/
public void readFile() {
  try {
   FileReader fis = new FileReader(this.fileSrcName);
   BufferedReader br = new BufferedReader(fis);
   String temp = null;
   while ((temp = br.readLine()) != null) {
    buffer.append(temp);
   }

  } catch (FileNotFoundException e) {
   System.out.println("源文件未找到!");
   e.printStackTrace();
  } catch (IOException e) {
   System.out.println("读写文件出现异常!");
   e.printStackTrace();
  }
}

/**
* 词法分析
*/
public void analyse() {
  boolean code, value;
  strToken = ""; // 置strToken为空串
  while (i < buffer.length()) {
   getChar();
   getBc();
   if (isLetter()) { // 如果ch为字母
    while (isLetter() || isDigit()) {
     concat();
     getChar();
    }
    retract(); // 回调
    code = isKeyWord(); // 判断strToken是否为关键字
    if (code) { // 如果是，则插入到1.保留字表中
     insertKeyWords(strToken);
    } else { // 否则插入到2.符号表中
     insertId(strToken);
    }
    strToken="";
   } else if (isDigit()) { // 如果ch为数字
    while (isDigit()) {
     concat();
     getChar();
    }
    retract(); // 回调
    insertConst(strToken); // 是常数，插入到3.常数表中
    strToken="";
   } else if (isOperator()) { // 如果是运算符，则插入到4.运算符表
    insertOperators(ch);
   } else if (isSeparators()) { // 如果是分隔符，插入到5.分隔符表中
    insertSeparators();
   }


  }

}

public static void main(String[] args) {

  /*
   * Scanner in = new Scanner(System.in);
   * System.out.println("请输入要分析的文件名："); String fileName = in.next();
   */
  Analyzer alr = new Analyzer("F://java//Analisis//src//test.txt");
  alr.readFile();
  alr.analyse();

  System.out.println("词法分析器的分析结果如下：");
  System.out.println("1、关键字表");
  for (String key : alr.retKeyWords) {
   System.out.print(key + " ");
  }
  System.out.println();
  System.out.println("2、标识符表");
  for (String id : alr.retIds) {
   System.out.print(id + " ");
  }
  System.out.println();
  System.out.println("3、常数表");
  for (Integer con : alr.retConsts) {
   System.out.print(con + " ");
  }
  System.out.println();
  System.out.println("4、运算符表");
  for (Character operator : alr.retOperators) {
   System.out.print(operator + " ");
  }
  System.out.println();
  System.out.println("5、分隔符表");
  for (Character separator : alr.retSeparators) {
   System.out.print(separator + " ");
  }
}