java解析cobol并将其按结构拆分为不同文件

编写一个Java程序来解析COBOL代码文件,并将代码中的各个Division、Section和Paragraph分别存储到不同的文件中。以下是实现该需求的代码:

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.*;
import java.util.regex.*;

public class CobolParser {

    private static class ParagraphInfo {
        private final String division;
        private final String section;
        private final String name;
        private final String content;

        public ParagraphInfo(String division, String section, String name, String content) {
            this.division = division != null ? division.toUpperCase() : "";
            this.section = section != null ? section.toUpperCase() : "";
            this.name = name != null ? name.toUpperCase() : "";
            this.content = content;
        }

        public String getDivision() { return division; }
        public String getSection() { return section; }
        public String getName() { return name; }
        public String getContent() { return content; }
    }

    private static final Pattern DIVISION_PATTERN = Pattern.compile(
        "^\\s*(IDENTIFICATION|ENVIRONMENT|DATA|PROCEDURE)\\s+DIVISION\\s*\\.?\\s*$",
        Pattern.CASE_INSENSITIVE
    );
    private static final Pattern SECTION_PATTERN = Pattern.compile(
        "^\\s*([A-Z0-9-]+)\\s+SECTION\\s*\\.?\\s*$",
        Pattern.CASE_INSENSITIVE
    );
    private static final Pattern PARAGRAPH_PATTERN = Pattern.compile(
        "^\\s*([A-Z0-9-]+)\\s*\\.\\s*$",
        Pattern.CASE_INSENSITIVE
    );
    private static final Pattern PROGRAM_ID_PATTERN = Pattern.compile(
        "PROGRAM-ID\\.\\s*([A-Z0-9-]+)",
        Pattern.CASE_INSENSITIVE
    );

    private String programName;
    private boolean inProgramId = false;
    private StringBuilder programIdBuilder = new StringBuilder();

    public List<ParagraphInfo> parse(File file) throws IOException {
        List<ParagraphInfo> paragraphs = new ArrayList<>();
        String currentDivision = null;
        String currentSection = null;
        String currentParagraph = null;
        StringBuilder currentContent = null;

        try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
            String line;
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("*")) continue;

                String trimmedLine = line.trim();
                Matcher divisionMatcher = DIVISION_PATTERN.matcher(trimmedLine);
                if (divisionMatcher.find()) {
                    if (currentParagraph != null) {
                        paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
                        currentContent = null;
                        currentParagraph = null;
                    }
                    currentDivision = divisionMatcher.group(1).toUpperCase();
                    currentSection = null;
                    if ("IDENTIFICATION".equalsIgnoreCase(currentDivision)) {
                        inProgramId = false;
                        programIdBuilder.setLength(0);
                    }
                }

                if (currentDivision != null) {
                    if ("IDENTIFICATION".equalsIgnoreCase(currentDivision) && programName == null) {
                        processProgramIdLine(trimmedLine);
                    }
                }

                Matcher sectionMatcher = SECTION_PATTERN.matcher(trimmedLine);
                if (sectionMatcher.find()) {
                    if (currentParagraph != null) {
                        paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
                        currentContent = null;
                        currentParagraph = null;
                    }
                    currentSection = sectionMatcher.group(1).toUpperCase();
                }

                Matcher paragraphMatcher = PARAGRAPH_PATTERN.matcher(trimmedLine);
                if (paragraphMatcher.find()) {
                    if (currentParagraph != null) {
                        paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
                    }
                    currentParagraph = paragraphMatcher.group(1).toUpperCase();
                    currentContent = new StringBuilder();
                    currentContent.append(line).append("\n");
                } else if (currentParagraph != null) {
                    currentContent.append(line).append("\n");
                }
            }
            if (currentParagraph != null) {
                paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
            }
        }
        return paragraphs;
    }

    private void processProgramIdLine(String line) {
        if (inProgramId) {
            if (line.contains(".")) {
                int dotIndex = line.indexOf('.');
                programIdBuilder.append(line.substring(0, dotIndex));
                programName = programIdBuilder.toString().trim().replaceAll("\\s+", "-");
                inProgramId = false;
            } else if (line.endsWith("-")) {
                programIdBuilder.append(line.substring(0, line.length()-1).trim());
            } else {
                programIdBuilder.append(line.trim());
            }
        } else {
            Matcher matcher = PROGRAM_ID_PATTERN.matcher(line);
            if (matcher.find()) {
                String remaining = line.substring(matcher.end()).replaceAll("\\.", "").trim();
                if (!remaining.isEmpty()) {
                    programName = remaining.split("\\s+")[0];
                } else {
                    inProgramId = true;
                }
            }
        }
    }

    public String getProgramName() {
        return programName;
    }

    public static void main(String[] args) throws IOException {
        if (args.length != 2) {
            System.out.println("Usage: java CobolParser <inputFile> <outputDir>");
            return;
        }

        File inputFile = new File(args[0]);
        String outputDir = args[1];

        CobolParser parser = new CobolParser();
        List<ParagraphInfo> paragraphs = parser.parse(inputFile);

        String programName = parser.getProgramName();
        if (programName == null) {
            String fileName = inputFile.getName();
            int dotIndex = fileName.lastIndexOf('.');
            programName = dotIndex == -1 ? fileName : fileName.substring(0, dotIndex);
        }

        for (ParagraphInfo p : paragraphs) {
            saveParagraph(p, programName, outputDir);
        }
    }

    private static void saveParagraph(ParagraphInfo p, String programName, String outputDir) throws IOException {
        String safeDivision = p.getDivision().replaceAll("[^A-Z0-9-]", "_");
        String safeSection = p.getSection().replaceAll("[^A-Z0-9-]", "_");
        String safeName = p.getName().replaceAll("[^A-Z0-9-]", "_");

        String dirName = String.format("%s_%s_%s", programName, safeDivision, safeSection);
        Path dirPath = Paths.get(outputDir, dirName);
        Files.createDirectories(dirPath);

        String fileName = safeName + ".txt";
        Path filePath = dirPath.resolve(fileName);
        Files.write(filePath, p.getContent().getBytes(StandardCharsets.UTF_8));
    }
}

代码说明

  1. 类结构

    • CobolParser 类负责解析COBOL文件。
    • ParagraphInfo 是内部类,用于存储每个段落的信息。
  2. 正则表达式

    • 使用正则表达式匹配COBOL的Division、Section和Paragraph结构。
  3. 解析逻辑

    • 逐行读取文件,跳过注释。
    • 识别Division、Section和Paragraph,并记录当前层级。
    • 处理PROGRAM-ID的提取,支持多行和续行。
  4. 文件保存

    • 根据程序名、Division、Section生成目录结构。
    • 确保文件名合法,替换非法字符。
    • 将每个段落的内容写入对应的文件。

使用方式

  1. 编译代码:

    javac CobolParser.java
    
  2. 运行程序:

    java CobolParser input.cbl output_directory
    

注意事项

  • 程序名提取:优先从PROGRAM-ID中获取,否则使用文件名。
  • 文件路径:处理特殊字符确保路径有效。
  • 编码格式:使用UTF-8保存文件,确保兼容性。

此代码能够高效解析COBOL结构,准确分割各层级并保存到指定路径,满足用户需求。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值