给出一个简单程序语言的语法结构
单词种类编码
代码实现
主方法
package com.cheng.wordAnalyzer;
import java.util.ArrayList;
public class Main {
public static void main(String[] args) {
String analyzePath="D:\\�ҵ��ļ�\\������\\complierTheory/word.txt";
WordAnalyzer analyze=new WordAnalyzer(analyzePath);
ArrayList<ArrayList<WordKind>> staff=analyze.analyze();
String savePath="D:\\�ҵ��ļ�\\������\\complierTheory/save.txt";
AnalyzeResult result=new AnalyzeResult(staff,savePath);
result.save();
}
}
单词编码
package com.cheng.wordAnalyzer;
public class WordKind {
String string;
String type;
int code;
public WordKind(char ch) {
this.string=String.valueOf(ch);
}
public WordKind(String ch) {
this.string = ch;
}
public String getString() {
return string;
}
public void setString(String string) {
this.string = string;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public int getCode() {
return code;
}
public void setCode(int code) {
this.code = code;
}
public void coding() {
int flag=0;
switch(string){
case "+":
this.type="operator";
this.code=100;
break;
case "-":
this.type="operator";
this.code=101;
break;
case "*":
this.type="operator";
this.code=102;
break;
case "/":
this.type="operator";
this.code=103;
break;
case "(":
this.type="operator";
this.code=104;
break;
case ")":
this.type="operator";
this.code=105;
break;
case "{":
this.type="operator";
this.code=106;
break;
case "}":
this.type="operator";
this.code=107;
break;
case "=":
this.type="operator";
this.code=108;
break;
case ".":
this.type="operator";
this.code=109;
break;
case "'":
this.type="operator";
this.code=110;
break;
case "\"":
this.type="operator";
this.code=111;
break;
case "%":
this.type="operator";
this.code=112;
break;
case "&":
this.type="operator";
this.code=113;
break;
case "!":
this.type="operator";
this.code=114;
break;
case "[":
this.type="operator";
this.code=115;
break;
case "]":
this.type="operator";
this.code=116;
break;
case "|":
this.type="operator";
this.code=117;
break;
case "boolean":
this.type="datatype";
this.code=118;
break;
case "byte":
this.type="datatype";
this.code=119;
break;
case "short":
this.type="datatype";
this.code=120;
break;
case "int":
this.type="datatype";
this.code=121;
break;
case "long":
this.type="datatype";
this.code=122;
break;
case "float":
this.type="datatype";
this.code=123;
break;
case "double":
this.type="datatype";
this.code=124;
break;
case "char":
this.type="datatype";
this.code=125;
break;
case "true":
this.type="constant";
this.code=126;
break;
case "false":
this.type="constant";
this.code=127;
break;
case "for":
this.type="key";
this.code=128;
break;
case "if":
this.type="key";
this.code=129;
break;
case "else":
this.type="key";
this.code=130;
break;
case "while":
this.type="key";
this.code=131;
break;
case "do":
this.type="key";
this.code=132;
break;
case "continue":
this.type="key";
this.code=133;
break;
case "switch":
this.type="key";
this.code=134;
break;
case "case":
this.type="key";
this.code=135;
break;
case "break":
this.type="key";
this.code=136;
break;
case "abstract":
this.type="key";
this.code=137;
break;
case "assert":
this.type="key";
this.code=138;
break;
case "catch":
this.type="key";
this.code=139;
break;
case "class":
this.type="key";
this.code=140;
break;
case "const":
this.type="key";
this.code=141;
break;
case "default":
this.type="key";
this.code=142;
break;
case "enum":
this.type="key";
this.code=143;
break;
case "extends":
this.type="key";
this.code=144;
break;
case "finally":
this.type="key";
this.code=145;
break;
case "goto":
this.type="key";
this.code=146;
break;
case "public":
this.type="key";
this.code=147;
break;
case "void":
this.type="key";
this.code=148;
break;
case "packet":
this.type="key";
this.code=149;
break;
case "import":
this.type="key";
this.code=150;
break;
case ";":
this.type="symbol";
this.code=151;
break;
case ",":
this.type="symbol";
this.code=152;
break;
case "^":
this.type="operator";
this.code=153;
break;
case "#":
this.type="operator";
this.code=154;
break;
default:flag=1;
}
if(flag==1) {
if(this.isDigital()) {
this.type="digital";
this.code=200;
}
else if(this.isIdentifier()){
this.type="identifier";
this.code=201;
}else {
this.type="other";
this.code=202;
}
}
}
public boolean isDigital() {
String regex="[0-9]+[\56]?[0-9]*";
return (string.matches(regex));
}
public boolean isIdentifier() {
String regex="[_,$,\\p{Alpha}]+[\\p{Alnum},_,$]*";
return (string.matches(regex));
}
}
对整个文件进行分析,拆分
package com.cheng.wordAnalyzer;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;
public class WordAnalyzer {
String path;
public WordAnalyzer(String path) {
this.path=path;
}
public ArrayList<ArrayList<WordKind>> analyze() {
ArrayList<ArrayList<WordKind>> staff=new ArrayList<>();
try {
File file=new File(path);
FileReader fileReader=new FileReader(file);
BufferedReader bufferedReader=new BufferedReader(fileReader);
String s=null;
while((s=bufferedReader.readLine())!=null) {
StringTokenizer analyze=new StringTokenizer(s);
for(int i=0;analyze.hasMoreTokens();i++) {
String str=analyze.nextToken();
WordAnalyze wordAnalyze=new WordAnalyze(str);
staff.add(wordAnalyze.split());
}
}
bufferedReader.close();
fileReader.close();
}catch(IOException e){
e.printStackTrace();
}
return staff;
}
}
对文件中的连在一起的句子进行分析拆分
package com.cheng.wordAnalyzer;
import java.util.ArrayList;
public class WordAnalyze {
String s;
WordAnalyze(String str) {
this.s=str;
}
public ArrayList<WordKind> split(){
ArrayList<WordKind> staff=new ArrayList<>();
int length=s.length();
int first=0;
for(int i=0;i<length;i++) {
String ch=String.valueOf(s.charAt(i));
if(ch.equals("_")) {
}
else if(ch.equals("$")) {
}
else if(ch.equals(".")) {
if(first==i) {
WordKind word=new WordKind(ch);
word.coding();
staff.add(word);
System.out.println(ch);
first=i+1;
}else {
if(i!=0&&i!=length-1) {
if(Character.isDigit(s.charAt(i-1))&&Character.isDigit(s.charAt(i+1))) {
}else {
String str=s.substring(first, i);
WordKind word=new WordKind(str);
word.coding();
staff.add(word);
System.out.println(s.substring(first, i));
System.out.println(ch);
WordKind word1=new WordKind(ch);
word1.coding();
staff.add(word1);
first=i+1;
}
}
}
}else {
String regex="\\p{Punct}";
if(ch.matches(regex)) {
if(first==i) {
WordKind word=new WordKind(ch);
word.coding();
staff.add(word);
first=i+1;
}else {
String str=s.substring(first, i);
WordKind word=new WordKind(str);
word.coding();
staff.add(word);
WordKind word1=new WordKind(ch);
word1.coding();
staff.add(word1);
first=i+1;
}
}
}
}
if(first<length) {
String str=s.substring(first, length);
WordKind word=new WordKind(str);
word.coding();
staff.add(word);
}
return staff;
}
}
分析结果保存
package com.cheng.wordAnalyzer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
public class AnalyzeResult {
ArrayList<ArrayList<WordKind>> list;
String savePath;
public AnalyzeResult(ArrayList<ArrayList<WordKind>> list,String savePath) {
this.list=list;
this.savePath=savePath;
}
public void save() {
if(list==null) {
return ;
}else {
try {
File file=new File(savePath);
FileWriter fileWriter=new FileWriter(file);
BufferedWriter bufferedWriter=new BufferedWriter(fileWriter);
String str="行数\\t单词符号\\t单词编码\\t单词种类";
int count=1;
bufferedWriter.write(str);
bufferedWriter.newLine();
int listSize=list.size();
for(int i=0;i<listSize;i++) {
ArrayList<WordKind> listChild=list.get(i);
int listChildSize=listChild.size();
for(int j=0;j<listChildSize;j++) {
WordKind wordKind=listChild.get(j);
String str1="第"+count+"行\t"+wordKind.string+"\t"+wordKind.code+"\t"+wordKind.type;
bufferedWriter.write(str1);
bufferedWriter.newLine();
count++;
}
}
bufferedWriter.flush();
bufferedWriter.close();
fileWriter.close();
}catch(IOException e) {
e.printStackTrace();
}
}
}
}