java解析cobol并将其按结构拆分为不同文件

本文链接：https://blog.csdn.net/harryjudy2240/article/details/145977821

编写一个Java程序来解析COBOL代码文件，并将代码中的各个Division、Section和Paragraph分别存储到不同的文件中。以下是实现该需求的代码：

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.*;
import java.util.regex.*;

public class CobolParser {

    private static class ParagraphInfo {
        private final String division;
        private final String section;
        private final String name;
        private final String content;

        public ParagraphInfo(String division, String section, String name, String content) {
            this.division = division != null ? division.toUpperCase() : "";
            this.section = section != null ? section.toUpperCase() : "";
            this.name = name != null ? name.toUpperCase() : "";
            this.content = content;
        }

        public String getDivision() { return division; }
        public String getSection() { return section; }
        public String getName() { return name; }
        public String getContent() { return content; }
    }

    private static final Pattern DIVISION_PATTERN = Pattern.compile(
        "^\\s*(IDENTIFICATION|ENVIRONMENT|DATA|PROCEDURE)\\s+DIVISION\\s*\\.?\\s*$",
        Pattern.CASE_INSENSITIVE
    );
    private static final Pattern SECTION_PATTERN = Pattern.compile(
        "^\\s*([A-Z0-9-]+)\\s+SECTION\\s*\\.?\\s*$",
        Pattern.CASE_INSENSITIVE
    );
    private static final Pattern PARAGRAPH_PATTERN = Pattern.compile(
        "^\\s*([A-Z0-9-]+)\\s*\\.\\s*$",
        Pattern.CASE_INSENSITIVE
    );
    private static final Pattern PROGRAM_ID_PATTERN = Pattern.compile(
        "PROGRAM-ID\\.\\s*([A-Z0-9-]+)",
        Pattern.CASE_INSENSITIVE
    );

    private String programName;
    private boolean inProgramId = false;
    private StringBuilder programIdBuilder = new StringBuilder();

    public List<ParagraphInfo> parse(File file) throws IOException {
        List<ParagraphInfo> paragraphs = new ArrayList<>();
        String currentDivision = null;
        String currentSection = null;
        String currentParagraph = null;
        StringBuilder currentContent = null;

        try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
            String line;
            while ((line = reader.readLine()) != null) {
                if (line.startsWith("*")) continue;

                String trimmedLine = line.trim();
                Matcher divisionMatcher = DIVISION_PATTERN.matcher(trimmedLine);
                if (divisionMatcher.find()) {
                    if (currentParagraph != null) {
                        paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
                        currentContent = null;
                        currentParagraph = null;
                    }
                    currentDivision = divisionMatcher.group(1).toUpperCase();
                    currentSection = null;
                    if ("IDENTIFICATION".equalsIgnoreCase(currentDivision)) {
                        inProgramId = false;
                        programIdBuilder.setLength(0);
                    }
                }

                if (currentDivision != null) {
                    if ("IDENTIFICATION".equalsIgnoreCase(currentDivision) && programName == null) {
                        processProgramIdLine(trimmedLine);
                    }
                }

                Matcher sectionMatcher = SECTION_PATTERN.matcher(trimmedLine);
                if (sectionMatcher.find()) {
                    if (currentParagraph != null) {
                        paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
                        currentContent = null;
                        currentParagraph = null;
                    }
                    currentSection = sectionMatcher.group(1).toUpperCase();
                }

                Matcher paragraphMatcher = PARAGRAPH_PATTERN.matcher(trimmedLine);
                if (paragraphMatcher.find()) {
                    if (currentParagraph != null) {
                        paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
                    }
                    currentParagraph = paragraphMatcher.group(1).toUpperCase();
                    currentContent = new StringBuilder();
                    currentContent.append(line).append("\n");
                } else if (currentParagraph != null) {
                    currentContent.append(line).append("\n");
                }
            }
            if (currentParagraph != null) {
                paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
            }
        }
        return paragraphs;
    }

    private void processProgramIdLine(String line) {
        if (inProgramId) {
            if (line.contains(".")) {
                int dotIndex = line.indexOf('.');
                programIdBuilder.append(line.substring(0, dotIndex));
                programName = programIdBuilder.toString().trim().replaceAll("\\s+", "-");
                inProgramId = false;
            } else if (line.endsWith("-")) {
                programIdBuilder.append(line.substring(0, line.length()-1).trim());
            } else {
                programIdBuilder.append(line.trim());
            }
        } else {
            Matcher matcher = PROGRAM_ID_PATTERN.matcher(line);
            if (matcher.find()) {
                String remaining = line.substring(matcher.end()).replaceAll("\\.", "").trim();
                if (!remaining.isEmpty()) {
                    programName = remaining.split("\\s+")[0];
                } else {
                    inProgramId = true;
                }
            }
        }
    }

    public String getProgramName() {
        return programName;
    }

    public static void main(String[] args) throws IOException {
        if (args.length != 2) {
            System.out.println("Usage: java CobolParser <inputFile> <outputDir>");
            return;
        }

        File inputFile = new File(args[0]);
        String outputDir = args[1];

        CobolParser parser = new CobolParser();
        List<ParagraphInfo> paragraphs = parser.parse(inputFile);

        String programName = parser.getProgramName();
        if (programName == null) {
            String fileName = inputFile.getName();
            int dotIndex = fileName.lastIndexOf('.');
            programName = dotIndex == -1 ? fileName : fileName.substring(0, dotIndex);
        }

        for (ParagraphInfo p : paragraphs) {
            saveParagraph(p, programName, outputDir);
        }
    }

    private static void saveParagraph(ParagraphInfo p, String programName, String outputDir) throws IOException {
        String safeDivision = p.getDivision().replaceAll("[^A-Z0-9-]", "_");
        String safeSection = p.getSection().replaceAll("[^A-Z0-9-]", "_");
        String safeName = p.getName().replaceAll("[^A-Z0-9-]", "_");

        String dirName = String.format("%s_%s_%s", programName, safeDivision, safeSection);
        Path dirPath = Paths.get(outputDir, dirName);
        Files.createDirectories(dirPath);

        String fileName = safeName + ".txt";
        Path filePath = dirPath.resolve(fileName);
        Files.write(filePath, p.getContent().getBytes(StandardCharsets.UTF_8));
    }
}

代码说明

类结构：
- CobolParser 类负责解析COBOL文件。
- ParagraphInfo 是内部类，用于存储每个段落的信息。
正则表达式：
- 使用正则表达式匹配COBOL的Division、Section和Paragraph结构。
解析逻辑：
- 逐行读取文件，跳过注释。
- 识别Division、Section和Paragraph，并记录当前层级。
- 处理PROGRAM-ID的提取，支持多行和续行。
文件保存：
- 根据程序名、Division、Section生成目录结构。
- 确保文件名合法，替换非法字符。
- 将每个段落的内容写入对应的文件。

使用方式

编译代码：
```
javac CobolParser.java
```

运行程序：

java CobolParser input.cbl output_directory

注意事项

程序名提取：优先从PROGRAM-ID中获取，否则使用文件名。
文件路径：处理特殊字符确保路径有效。
编码格式：使用UTF-8保存文件，确保兼容性。

此代码能够高效解析COBOL结构，准确分割各层级并保存到指定路径，满足用户需求。