编译原理-词法分析

N-Ge

于 2021-05-28 11:27:06 发布

阅读量335

点赞数 1

分类专栏：编译原理文章标签： java

本文链接：https://blog.csdn.net/m0_47355351/article/details/117356654

版权

编译原理专栏收录该内容

1 篇文章 0 订阅

订阅专栏

给出一个简单程序语言的语法结构

在这里插入图片描述

单词种类编码

在这里插入图片描述

代码实现

主方法

package com.cheng.wordAnalyzer;

import java.util.ArrayList;

public class Main {

	public static void main(String[] args) {
		// TODO Auto-generated method stub
    String analyzePath="D:\\�ҵ��ļ�\\������\\complierTheory/word.txt";//存放程序的文件
    WordAnalyzer analyze=new WordAnalyzer(analyzePath);
    ArrayList<ArrayList<WordKind>> staff=analyze.analyze();
    String savePath="D:\\�ҵ��ļ�\\������\\complierTheory/save.txt";//分析后保存在此文件中
    AnalyzeResult result=new AnalyzeResult(staff,savePath);
    result.save();
    /*
    String s="$2555ld";
    String regex="([\\p{Alpha}]+[_]?[\\p{Digit}]*)|[$][\\p{Alnum}]+";
    System.out.println(s.matches(regex));
    
    String s="abcd_sdsd";
    for(int i=0;i<s.length();i++) {  
		  String ch=String.valueOf(s.charAt(i));//�����ڸ��±��µ��ַ�
		 
		  if("_".equals(ch)) {
			  System.out.println("********************************************");
		  }
    }
    */
    //System.out.println(ch=="_");
	}

}

单词编码

package com.cheng.wordAnalyzer;

public class WordKind {
    String string;
    String type;
    int code;
    public WordKind(char ch) {
    	this.string=String.valueOf(ch);
    	
    }
	public WordKind(String ch) {
		this.string = ch;
		
	}
	
	public String getString() {
		return string;
	}
	public void setString(String string) {
		this.string = string;
	}
	public String getType() {
		return type;
	}
	public void setType(String type) {
		this.type = type;
	}
	public int getCode() {
		return code;
	}
	public void setCode(int code) {
		this.code = code;
	}
	public void coding() {
		int flag=0;//判断是否属于下列switch中的
		switch(string){       //数据类型编码，包括基本数据类型、布尔类型、关键字，运算符
		case "+":
			this.type="operator";
			this.code=100;
			break;
		case "-":
			this.type="operator";
			this.code=101;
			break;
		case "*":
			this.type="operator";
			this.code=102;
			break;
		case "/":
			this.type="operator";
			this.code=103;
			break;
		case "(":
			this.type="operator";
			this.code=104;
			break;
		case ")":
			this.type="operator";
			this.code=105;
			break;
		case "{":
			this.type="operator";
			this.code=106;
			break;
		case "}":
			this.type="operator";
			this.code=107;
			break;
		case "=":
			this.type="operator";
			this.code=108;
			break;
		case ".":
			this.type="operator";
			this.code=109;
			break;
		case "'":
			this.type="operator";
			this.code=110;
			break;
		case "\"":
			this.type="operator";
			this.code=111;
			break;
		case "%":
			this.type="operator";
			this.code=112;
			break;
		case "&":
			this.type="operator";
			this.code=113;
			break;
		case "!":
			this.type="operator";
			this.code=114;
			break;
		case "[":
			this.type="operator";
			this.code=115;
			break;
		case "]":
			this.type="operator";
			this.code=116;
			break;
		case "|":
			this.type="operator";
			this.code=117;
		 	break;
		case "boolean":
			this.type="datatype";
			this.code=118;
		 	break;
		case "byte":
			this.type="datatype";
			this.code=119;
		 	break;
		case "short":
			this.type="datatype";
			this.code=120;
		 	break;
		case "int":
			this.type="datatype";
			this.code=121;
		 	break;
		case "long":
			this.type="datatype";
			this.code=122;
		 	break;
		case "float":
			this.type="datatype";
			this.code=123;
		 	break;
		case "double":
			this.type="datatype";
			this.code=124;
		 	break;
		case "char":
			this.type="datatype";
			this.code=125;
		 	break;
		case "true":
			this.type="constant";
			this.code=126;
		 	break;
		case "false":
			this.type="constant";
			this.code=127;
		 	break;
		case "for":
			this.type="key";
			this.code=128;
		 	break;
		case "if":
			this.type="key";
			this.code=129;
		 	break;
		case "else":
			this.type="key";
			this.code=130;
		 	break;
		case "while":
			this.type="key";
			this.code=131;
		 	break;
		case "do":
			this.type="key";
			this.code=132;
		 	break;
		case "continue":
			this.type="key";
			this.code=133;
		 	break;
		case "switch":
			this.type="key";
			this.code=134;
		 	break;
		case "case":
			this.type="key";
			this.code=135;
		 	break;
		case "break":
			this.type="key";
			this.code=136;
		 	break;
		case "abstract":
			this.type="key";
			this.code=137;
		 	break;
		case "assert":
			this.type="key";
			this.code=138;
		 	break;
		case "catch":
			this.type="key";
			this.code=139;
		 	break;
		case "class":
			this.type="key";
			this.code=140;
		 	break;
		case "const":
			this.type="key";
			this.code=141;
		 	break;
		case "default":
			this.type="key";
			this.code=142;
		 	break;
		case "enum":
			this.type="key";
			this.code=143;
		 	break;
		case "extends":
			this.type="key";
			this.code=144;
		 	break;
		case "finally":
			this.type="key";
			this.code=145;
		 	break;
		case "goto":
			this.type="key";
			this.code=146;
		 	break;
		case "public":
			this.type="key";
			this.code=147;
		 	break;
		case "void":
			this.type="key";
			this.code=148;
		 	break;
		case "packet":
			this.type="key";
			this.code=149;
		 	break;
		case "import":
			this.type="key";
			this.code=150;
		 	break;
		case ";":
			this.type="symbol";
			this.code=151;
		 	break;
		case ",":
			this.type="symbol";
			this.code=152;
		 	break;
		case "^":
			this.type="operator";
			this.code=153;
		 	break;
		case "#":
			this.type="operator";
			this.code=154;
		 	break;
		 default:flag=1;
	   }
		if(flag==1) {
			if(this.isDigital()) {
				this.type="digital";    //数字
				this.code=200;
			}
			else if(this.isIdentifier()){
				this.type="identifier";  //标识符
				this.code=201;
			}else {
				this.type="other"; //其他
				this.code=202;
			}
		}
		
	  }
	public boolean isDigital() {//判断是否是数字，用正则表达式匹配
		String regex="[0-9]+[\56]?[0-9]*";
	    return (string.matches(regex));
	}
	public boolean isIdentifier() { //匹配是否是标识符
		//String regex="([\\p{Alpha}]+[_]+[\\p{Digit}]*)|([$]+[\\p{Alnum}]+[_]*[\\p{Alnum}]*)|([_]+[\\p{Alnum},$]+)";
		String regex="[_,$,\\p{Alpha}]+[\\p{Alnum},_,$]*";
		return (string.matches(regex));
	}
    
}

对整个文件进行分析，拆分

package com.cheng.wordAnalyzer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;

public class WordAnalyzer {
   String path;
   public WordAnalyzer(String path) {
	   this.path=path;
	   
   }
   public ArrayList<ArrayList<WordKind>> analyze() {
	   ArrayList<ArrayList<WordKind>> staff=new ArrayList<>();
	   try {   
		   File file=new File(path);
		   FileReader fileReader=new FileReader(file);
		   BufferedReader bufferedReader=new BufferedReader(fileReader);
		   String s=null;
		   while((s=bufferedReader.readLine())!=null) {
//			   if(s.length()==0) {
//				   continue;
//			   }
			   /**
			        * 对文件的每一行进行拆分，按照空格
			    */
			   StringTokenizer analyze=new StringTokenizer(s);
			   for(int i=0;analyze.hasMoreTokens();i++) {
				   String str=analyze.nextToken();
				   WordAnalyze wordAnalyze=new WordAnalyze(str);
				   staff.add(wordAnalyze.split());
			   }
		   }
		   
		   bufferedReader.close();
		   fileReader.close();
		   
		   }catch(IOException e){
			   e.printStackTrace();
		   }
	   return staff;
   }
   
	
}

对文件中的连在一起的句子进行分析拆分

package com.cheng.wordAnalyzer;

import java.util.ArrayList;



public class WordAnalyze {
	  String s;
      WordAnalyze(String str) {
	  this.s=str;
   }
      public ArrayList<WordKind> split(){
    	  ArrayList<WordKind> staff=new ArrayList<>();
    	  int length=s.length();
    	  int first=0;
    	  for(int i=0;i<length;i++) {  
    		  String ch=String.valueOf(s.charAt(i));//保存在该下标下的字符
    		  
    		  if(ch.equals("_")) {
    			  //System.out.println("********************************************");
    		  }
    		  else if(ch.equals("$")) {
    			  
    		  }
    		  else if(ch.equals(".")) {
    			 // System.out.println("********************************************");
    			  if(first==i) {
    				  WordKind word=new WordKind(ch);
    				  word.coding();
    				  staff.add(word);
    				  System.out.println(ch);
     				  first=i+1;    				  
     			 }else {
     				//System.out.println("********************************************");
	     			 if(i!=0&&i!=length-1) {
		     			 if(Character.isDigit(s.charAt(i-1))&&Character.isDigit(s.charAt(i+1))) {
		     				 
		     			 }else {
		    				String str=s.substring(first, i);
		    				WordKind word=new WordKind(str);
		    				word.coding();
		    				staff.add(word);
		     				System.out.println(s.substring(first, i));
		     				System.out.println(ch);
		     				WordKind word1=new WordKind(ch);
		    				word1.coding();
		    				staff.add(word1);
		     			    first=i+1;		     			     
		     			 }    			 
	     			 }
     			 }
    		  }else {
    			  //System.out.println("********************************************");
    			  String regex="\\p{Punct}";//匹配标点符号
    		      if(ch.matches(regex)) {
    		    	  if(first==i) {
        				  WordKind word=new WordKind(ch);
        				  word.coding();
        				  staff.add(word);
        				  //System.out.println(ch);
         				  first=i+1;    				  
         			 }else {
    		    	    String str=s.substring(first, i);
	    				WordKind word=new WordKind(str);
	    				word.coding();
	    				staff.add(word);
	     				//System.out.println(s.substring(first, i));
	     				//System.out.println(ch);
	     				WordKind word1=new WordKind(ch);
	    				word1.coding();
	    				staff.add(word1);
	     			    first=i+1;	
         			 }
    		      }
    		  }		      		  
    	  }
    	  if(first<length) {
    		   String str=s.substring(first, length);
				WordKind word=new WordKind(str);
				word.coding();
				staff.add(word);
    		    //System.out.println(s.substring(first, length));
    	  }
    	  return staff;
      }
}

分析结果保存

package com.cheng.wordAnalyzer;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;

public class AnalyzeResult {
	ArrayList<ArrayList<WordKind>> list;
	String savePath;
	public AnalyzeResult(ArrayList<ArrayList<WordKind>> list,String savePath) {
		this.list=list;
		this.savePath=savePath;	
	}
	public void save() {
		if(list==null) {
			return ;
		}else {
			try {
				File file=new File(savePath);
				FileWriter fileWriter=new FileWriter(file);
				BufferedWriter bufferedWriter=new BufferedWriter(fileWriter);
				String str="行数\\t单词符号\\t单词编码\\t单词种类";
				int count=1;
				bufferedWriter.write(str);
				bufferedWriter.newLine();
				int listSize=list.size();
			    for(int i=0;i<listSize;i++) {
					ArrayList<WordKind> listChild=list.get(i);
					int listChildSize=listChild.size();
					for(int j=0;j<listChildSize;j++) {
						WordKind wordKind=listChild.get(j);
						String str1="第"+count+"行\t"+wordKind.string+"\t"+wordKind.code+"\t"+wordKind.type;
						bufferedWriter.write(str1);
						bufferedWriter.newLine();
						count++;
					}
		     	}
			    bufferedWriter.flush();
			    bufferedWriter.close();
			    fileWriter.close();
			}catch(IOException e) {
				e.printStackTrace();
			}
			
		}
	}
	
}