词法分析实验编程记录——2021-10-15

最新推荐文章于 2024-10-06 20:16:20 发布

stumiss

最新推荐文章于 2024-10-06 20:16:20 发布

阅读量132

点赞数

文章标签： java

本文链接：https://blog.csdn.net/stumiss/article/details/120785464

版权

该博客介绍了一个Java实现的词法分析器，用于识别简单的编程语言中的关键字、标识符、数字、运算符等。程序通过扫描输入的字符数组，逐个读取并解析，输出对应的种别码和识别的单词。程序支持常见的比较和赋值操作符，并能识别特定的关键字，如'begin'、'if'、'then'等。

摘要由CSDN通过智能技术生成

import java.util.*;

/**
 * 1)ch字符变量，存放当前读进的源程序字符
 * 2)token字符数组，存放构成单词符号的字符串
 * 3)getCh()读字符函数，每调用一次从输入缓冲区读进源程序的下一个字符放在ch中，并把读字符指针指向
 * 下一个字符。
 * 4)getBc()函数，每次调用时，检查ch字符是否为空白字符，若是空白字符，则反复调用getCh(),直至ch
 * 中读入一个非空白字符为止。
 * 5)concat()函数，每次调用把当前ch中的字符与token中的字符串连接。
 * 6)letter(ch)和digit(ch)布尔函数，分别判定ch中的字符是否为字母和数字
 * 7)reserve()整型函数，对token中的字符串查关键字表，若她是关键字，则返回其编码，否则返回标识符
 * 的种别码
 * 8)retract()函数,读字符指针回退一个字符
 * 9)return()函数，收集并携带必要的信息返回调用程序，即返回语法分析程序
 * 10)dtb()函数，它将token中的数字串转换成二进制表示，并以此作为函数值返回。
 */
public class Demo {

    //存放源程序字符数组，以索引做指针。
    public static char[] prog;

    //种别码
    public static int syn;
    //prog字符数组指针
    public static int p;

    public static int n;

    //数字
    public static int sum;


    public static char ch;
    public static String token;

    //关键字表
    //public static String[] rwtab={"begin","if","then","while","do","end"};
//    public static Set<String> rwttab=new HashSet<>();
    public static Map<String,Integer> rwttab=new HashMap<>();

    static {
//        rwttab.add("begin");
//        rwttab.add("if");
//        rwttab.add("then");
//        rwttab.add("while");
//        rwttab.add("do");
//        rwttab.add("end");
        rwttab.put("begin",1);
        rwttab.put("if",2);
        rwttab.put("then",3);
        rwttab.put("while",4);
        rwttab.put("do",5);
        rwttab.put("end",6);
    }

    public static void main(String[] args) {

        p=0;
        System.out.println("please input string:");
        Scanner scanner=new Scanner(System.in);
        prog=scanner.nextLine().toCharArray();
        do{
            scaner();
//            switch (syn){
//                case 11:break;
//                case -1:break;
//                default:
//            }
        }while (syn!=0);
    }

    //一次调用，一个单词识别，此方法需要放到循环里面使用
    public static void scaner(){
        token="";//每分析完一个单词，需要将其置空
        //读取下一个字符
        getCh();
        getBc();
        //经过上述两步，此时ch为非空字符
        if (letter(ch)){
            while (letter(ch)||digit(ch)){
                //ch加入token中，构建单词
                token=token+ch;
                //读取下一个字符
                getCh();
            }
            //退出循环后，此时ch存储的字符不是数字或字母(可能是空白字符或运算符等)，指针p指向位于ch存储的字符的下一个字符
            //指针p回退一位
            p--;
            //判断token是否是关键字
            if (rwttab.containsKey(token)){
                //得到关键字种别码
                syn=rwttab.get(token);
                //处理结果为二元组形式：(syn,token|sum)
                System.out.println("("+syn+","+token+")");
            }else{
                //是标识符,种别码为10
                syn=10;
                System.out.println("("+syn+","+token+")");
            }

            token="";//处理完后将其置空。
        }else if (digit(ch)){
            while (digit(ch)){
                //ch加入token中，构建单词即数字串
                token=token+ch;
                //读取下一个字符
                getCh();
            }
            //退出循环后，此时ch存储的字符不是数字(可能是空白字符或运算符等)，指针p指向位于ch存储的字符的下一个字符
            //指针p回退一位
            p--;
            //此时token存放的是数字串
            syn=11;
            System.out.println("("+syn+","+token+")");
            token="";
        }else {
            switch (ch){
                case '<':
                    token=token+ch;
                    //读取下一个字符
                    getCh();
                    if (ch=='>'){
                        token=token+ch;//  token="<>"
                        syn=21;
                        System.out.println("("+syn+","+token+")");
                    }else if (ch=='='){
                        token=token+ch;//token="<="
                        syn=22;
                        System.out.println("("+syn+","+token+")");
                    }else{
                        syn=20;  // token="<"
                        System.out.println("("+syn+","+token+")");
                        //回退指针
                        p--;
                    }
                    token="";
                    break;
                case '>':
                    token=token+ch;
                    getCh();
                    if (ch=='='){
                        token=token+ch;
                        syn=24;
                        System.out.println("("+syn+","+token+")");
                    }else {
                        syn=23;
                        System.out.println("("+syn+","+token+")");
                        p--;
                    }
                    token="";
                    break;
                case ':':
                    token=token+ch;
                    getCh();
                    if (ch=='='){
                        token=token+ch;
                        syn=18;
                        System.out.println("("+syn+","+token+")");
                    }else {
                        syn=17;
                        System.out.println("("+syn+","+token+")");
                        p--;
                    }
                    token="";
                    break;
                case '+':
                    syn=13;
                    System.out.println("("+syn+",+"+")");
                    break;
                case '-':
                    syn=14;
                    System.out.println("("+syn+",-"+")");
                    break;
                case '*':
                    syn=15;
                    System.out.println("("+syn+",*"+")");
                    break;
                case '/':
                    syn=16;
                    System.out.println("("+syn+",/"+")");
                    break;
                case '=':
                    syn=25;
                    System.out.println("("+syn+",="+")");
                    break;
                case ';':
                    syn=26;
                    System.out.println("("+syn+",;"+")");
                    break;
                case '(':
                    syn=27;
                    System.out.println("("+syn+",("+")");
                    break;
                case ')':
                    syn=28;
                    System.out.println("("+syn+",)"+")");
                    break;
                case '#':
                    syn=0;
                    System.out.println("("+syn+",#"+")");
                    break;
                default: syn=-1;
            }
        }

    }

    //判断指定字符是否是数字
    private static boolean digit(char ch) {
        return Character.isDigit(ch);
    }

    //判断指定字符是否是字母
    private static boolean letter(char ch) {
        return Character.isLetter(ch);
    }

    private static void getBc() {
        while (ch==' '){
           getCh();
        }
    }

    //读取下一个字符
    public static void getCh(){
        ch=prog[p++];
    }
}

运行结果：

please input string:
begin x:= 9;  if x>0 then x=2*x+1/3-4;  end  #
(1,begin)
(10,x)
(18,:=)
(11,9)
(26,;)
(2,if)
(10,x)
(23,>)
(11,0)
(3,then)
(10,x)
(25,=)
(11,2)
(15,*)
(10,x)
(13,+)
(11,1)
(16,/)
(11,3)
(14,-)
(11,4)
(26,;)
(6,end)
(0,#)

//输入字符串，加#结尾才可以正确解析，否则会数组越界。
please input string:
123#
(11,123)
(0,#)

//这里就说明什么：词法分析仅仅是分词，没有进行语法分析，正常123abc不能作为标识符，此为语法错误。
please input string:
123abc#
(11,123)
(10,abc)
(0,#)

stumiss

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫