使用JAVA实现PL0语言的词法分析器

使用JAVA实现PL0语言的词法分析器

用直接转向法实现有限自动机的代码

在这里插入图片描述

枚举类

import java.util.UUID;

/**
 * @author HJC
 * @version 1.0
 * @apiNote PL0语言单词分类
 * @since 2021/11/6
 */
public enum PL0 {
    IDENTIFIER("IDENTIFIER", 1),
    INTEGER("INTEGER", 2),
    ADD("+", 3),
    SUB("-", 4),
    MUL("*", 5),
    DIV("/", 6),
    EQL("=", 7),
    GT(">", 8),
    LT("<", 9),
    NE("<>", 10),
    LE("<=", 11),
    GE(">=", 12),
    LS("(", 13),
    RS(")", 14),
    LB("{", 15),
    RB("}", 16),
    SEM(";", 17),
    DOT(",", 18),
    YY("\"", 19),
    SET(":=", 20),
    VAR("var", 21),
    IF("if", 22),
    THEN("then", 23),
    ELSE("else", 24),
    WHILE("while", 25),
    FOR("for", 26),
    BEGIN("begin", 27),
    WRITELN("writeln", 28),
    PROCEDURE("procedure", 29),
    END("end", 30),
    ERROR(UUID.randomUUID().toString(), 100);
    private String word;
    private Integer value;


    PL0(String word, Integer value) {
        this.word = word;
        this.value = value;
    }

    public String getWord() {
        return word;
    }

    public void setWord(String word) {
        this.word = word;
    }

    public Integer getValue() {
        return value;
    }

    public void setValue(Integer value) {
        this.value = value;
    }
}

Node实体类(key,value)

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.Getter;
import lombok.Setter;

/**
 * @author HJC
 * @version 1.0
 * @apiNote Node类
 * @since 2021/11/6
 */
@Data
@Getter
@Setter
@AllArgsConstructor
public class Node {
    String key;
    Integer value;

    @Override
    public String toString() {
        return "(" + value + "," + key + ")";
    }
}

主实现类

import javax.swing.*;
import java.io.*;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/**
 * @author HJC
 * @version 1.0
 * @apiNote PL0语言的词法分析器实现
 * @since 2021/11/6
 */
public class Main {

    /*
        代码段索引
     */
    private static Integer index = 0;
    /*
        代码段
     */
    private static String progress;
    /*

        输出处理结果的链表
     */
    public static List<Node> list = new LinkedList<>();
    /*
        存取单词总类的哈希表
     */
    public static Map<String, Integer> map = new HashMap<>();


    public static void main(String[] args) throws IOException {
        // 初始化单词分类
        init();
        // 初始化代码
        initPL0();
        System.out.println(progress);
        // 消除注释没有写
        // 分析代码
        analyzer();

    }

    private static void analyzer() {
        while (index < progress.length()) {
            // 扫描字段
            Node node = scanner();
            if (node != null) {
                list.add(node);
            }
        }
        // 输出字段
        list.forEach(System.out::println);
    }

    /*
     *  扫描一个单词出来
     *
     */
    private static Node scanner() {
        StringBuilder tokenBuilder = new StringBuilder();

        // 跳过一些特定字符
        while (progress.charAt(index) == ' ' || progress.charAt(index) == '\n' || progress.charAt(index) == '\t' || progress.charAt(index) == '\r') {
            index++;
            if (index >= progress.length()) {
                return null;
            }
        }
        // 字母开头
        if (Character.isLetter(progress.charAt(index))) {
            while (Character.isLetter(progress.charAt(index)) || Character.isDigit(progress.charAt(index)) || progress.charAt(index) == '_') {
                tokenBuilder.append(progress.charAt(index));
                index++;
            }
            String res = tokenBuilder.toString();
            // 关键字
            if (map.containsKey(res)) {
                return new Node("\"" + res + "\"", map.get(res));
                // 标识符
            } else {
                return new Node("\"" + res + "\"", PL0.IDENTIFIER.getValue());
            }
        }
        // 数字开头
        if (Character.isDigit(progress.charAt(index))) {
            boolean floatFlag = false;
            while (Character.isDigit(progress.charAt(index)) || progress.charAt(index) == '.') {
                if (!floatFlag && progress.charAt(index) == '.') {
                    floatFlag = true;
                }
                if (floatFlag && progress.charAt(index) == '.') {
                    throw new IllegalArgumentException("语法不正确");
                }
                tokenBuilder.append(progress.charAt(index));
                index++;
            }
            return new Node(tokenBuilder.toString(), PL0.INTEGER.getValue());
        }
        // 符号
        switch (progress.charAt(index)) {
            // 冒号
            case ':':
                if (progress.charAt(index + 1) == '=') {
                    index += 2;
                    //:=
                    return new Node("\"" + PL0.SET.getWord() + "\"", PL0.SET.getValue());
                } else {
                    // 单独的:
                    index++;
                    throw new IllegalArgumentException("语法不正确,:的意思是:=嘛?");
                }
            case '<':
                // <>
                if (progress.charAt(index + 1) == '>') {
                    index += 2;
                    return new Node("\"" + PL0.NE.getWord() + "\"", PL0.NE.getValue());
                }
                // <=
                if (progress.charAt(index + 1) == '=') {
                    index += 2;
                    return new Node("\"" + PL0.LE.getWord() + "\"", PL0.LE.getValue());
                }
                index++;
                return new Node("\"" + PL0.LT + "\"", PL0.LT.getValue());
            case '>':
                if (progress.charAt(index + 1) == '=') {
                    index += 2;
                    return new Node("\"" + PL0.GE.getWord() + "\"", PL0.GE.getValue());
                }
                index++;
                return new Node("\"" + PL0.GT.getWord() + "\"", PL0.GT.getValue());
            case '+':
            case '-':
            case '*':
            case '/':
            case '=':
            case ';':
            case ',':
            case '"':
                // 无特殊字段,直接返回
                String res = String.valueOf(progress.charAt(index++));
                return new Node("\"" + res + "\"", map.get(res));
            default:
                index++;
                return null;
        }
    }

    private static void init() {
        for (PL0 v : PL0.values()) {
            map.put(v.getWord(), v.getValue());
        }
    }

    private static void initPL0() throws IOException {

/*
        选择一个文件
        JFrame jFrame = new JFrame();
        JFileChooser jFileChooser = new JFileChooser();
        jFileChooser.showOpenDialog(jFrame);
        String path = jFileChooser.getSelectedFile().getPath();*/

        String path = "D:\\HJC\\Desktop\\编译原理\\pl01.txt";
        BufferedReader reader = null;
        try {
            System.out.println(path);
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
            String str;
            StringBuilder stringBuilder = new StringBuilder();
            while ((str = reader.readLine()) != null) {
                stringBuilder.append(str).append('\n');
            }
            progress = stringBuilder.toString();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                reader.close();
            }
        }

    }
}

处理结果

procedure divide;
var w;
begin
	r := x; q := 0; w := y;
end

(29,"procedure")
(1,"divide")
(17,";")
(21,"var")
(1,"w")
(17,";")
(27,"begin")
(1,"r")
(20,":=")
(1,"x")
(17,";")
(1,"q")
(20,":=")
(2,0)
(17,";")
(1,"w")
(20,":=")
(1,"y")
(17,";")
(30,"end")

进程已结束,退出代码为 0

  • 3
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
A: 要完成PL/0语言词法分析器,可以采用C语言实现。以下是一个简单的词法分析器代码示例: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TOKEN_LEN 128 // 最大标记长度 typedef enum { ID, NUM, ASSIGN, ADD, SUB, MUL, DIV, L_PAREN, R_PAREN, SEMI, ERROR, END_OF_FILE } TokenType; typedef struct { TokenType type; // 标记的类型 char value[MAX_TOKEN_LEN]; // 标记的值 } Token; Token current_token; // 当前标记 char current_char; // 当前字符 int current_pos = -1; // 当前字符位置 char buffer[MAX_TOKEN_LEN]; // 标记缓冲区 char peek() { char next_char = getchar(); ungetc(next_char, stdin); return next_char; } void advance() { current_pos++; if (current_pos < strlen(buffer)) { current_char = buffer[current_pos]; } else { current_char = getchar(); buffer[current_pos] = current_char; } } void skip_whitespace() { while (current_char == ' ' || current_char == '\n' || current_char == '\r' || current_char == '\t') { advance(); } } Token get_id() { int pos = 0; while ((current_char >= 'a' && current_char <= 'z') || (current_char >= 'A' && current_char <= 'Z') || (current_char >= '0' && current_char <= '9')) { if (pos < MAX_TOKEN_LEN - 1) { buffer[pos++] = current_char; } advance(); } buffer[pos] = '\0'; if (strcmp(buffer, "BEGIN") == 0) { return (Token) {ID, buffer}; } else if (strcmp(buffer, "END") == 0) { return (Token) {ID, buffer}; } else if (strcmp(buffer, "CONST") == 0) { return (Token) {ID, buffer}; } else if (strcmp(buffer, "VAR") == 0) { return (Token) {ID, buffer}; } else if (strcmp(buffer, "WHILE") == 0) { return (Token) {ID, buffer}; } else if (strcmp(buffer, "DO") == 0) { return (Token) {ID, buffer}; } else if (strcmp(buffer, "IF") == 0) { return (Token) {ID, buffer}; } else if (strcmp(buffer, "THEN") == 0) { return (Token) {ID, buffer}; } else { return (Token) {ID, buffer}; } } Token get_num() { int pos = 0; while (current_char >= '0' && current_char <= '9') { if (pos < MAX_TOKEN_LEN - 1) { buffer[pos++] = current_char; } advance(); } buffer[pos] = '\0'; return (Token) {NUM, buffer}; } Token get_symbol() { Token token; switch (current_char) { case ':': if (peek() == '=') { advance(); advance(); return (Token) {ASSIGN, ":="}; } else { return (Token) {ERROR, ":"}; } break; case '+': advance(); return (Token) {ADD, "+"}; break; case '-': advance(); return (Token) {SUB, "-"}; break; case '*': advance(); return (Token) {MUL, "*"}; break; case '/': advance(); return (Token) {DIV, "/"}; break; case '(': advance(); return (Token) {L_PAREN, "("}; break; case ')': advance(); return (Token) {R_PAREN, ")"}; break; case ';': advance(); return (Token) {SEMI, ";"}; break; default: advance(); return (Token) {ERROR, "Invalid character"}; } } Token get_next_token() { Token token; skip_whitespace(); if (current_char == '\0') { return (Token) {END_OF_FILE, ""}; } if ((current_char >= 'a' && current_char <= 'z') || (current_char >= 'A' && current_char <= 'Z')) { return get_id(); } if (current_char >= '0' && current_char <= '9') { return get_num(); } return get_symbol(); } int main() { while (1) { current_token = get_next_token(); if (current_token.type == END_OF_FILE) { break; } printf("Token type: %d, Token value: %s\n", current_token.type, current_token.value); } return 0; } ``` 这个示例代码能够处理简单的PL/0语言表达式: ``` a := 5 + 6; b := 7 * 8 - 9; ``` 输出的结果如下: ``` Token type: ID, Token value: a Token type: ASSIGN, Token value: := Token type: NUM, Token value: 5 Token type: ADD, Token value: + Token type: NUM, Token value: 6 Token type: SEMI, Token value: ; Token type: ID, Token value: b Token type: ASSIGN, Token value: := Token type: NUM, Token value: 7 Token type: MUL, Token value: * Token type: NUM, Token value: 8 Token type: SUB, Token value: - Token type: NUM, Token value: 9 Token type: SEMI, Token value: ; Token type: END_OF_FILE, Token value: ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

coding小黄

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值