编写一个Java程序来解析COBOL代码文件,并将代码中的各个Division、Section和Paragraph分别存储到不同的文件中。以下是实现该需求的代码:
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.*;
import java.util.regex.*;
public class CobolParser {
private static class ParagraphInfo {
private final String division;
private final String section;
private final String name;
private final String content;
public ParagraphInfo(String division, String section, String name, String content) {
this.division = division != null ? division.toUpperCase() : "";
this.section = section != null ? section.toUpperCase() : "";
this.name = name != null ? name.toUpperCase() : "";
this.content = content;
}
public String getDivision() { return division; }
public String getSection() { return section; }
public String getName() { return name; }
public String getContent() { return content; }
}
private static final Pattern DIVISION_PATTERN = Pattern.compile(
"^\\s*(IDENTIFICATION|ENVIRONMENT|DATA|PROCEDURE)\\s+DIVISION\\s*\\.?\\s*$",
Pattern.CASE_INSENSITIVE
);
private static final Pattern SECTION_PATTERN = Pattern.compile(
"^\\s*([A-Z0-9-]+)\\s+SECTION\\s*\\.?\\s*$",
Pattern.CASE_INSENSITIVE
);
private static final Pattern PARAGRAPH_PATTERN = Pattern.compile(
"^\\s*([A-Z0-9-]+)\\s*\\.\\s*$",
Pattern.CASE_INSENSITIVE
);
private static final Pattern PROGRAM_ID_PATTERN = Pattern.compile(
"PROGRAM-ID\\.\\s*([A-Z0-9-]+)",
Pattern.CASE_INSENSITIVE
);
private String programName;
private boolean inProgramId = false;
private StringBuilder programIdBuilder = new StringBuilder();
public List<ParagraphInfo> parse(File file) throws IOException {
List<ParagraphInfo> paragraphs = new ArrayList<>();
String currentDivision = null;
String currentSection = null;
String currentParagraph = null;
StringBuilder currentContent = null;
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.startsWith("*")) continue;
String trimmedLine = line.trim();
Matcher divisionMatcher = DIVISION_PATTERN.matcher(trimmedLine);
if (divisionMatcher.find()) {
if (currentParagraph != null) {
paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
currentContent = null;
currentParagraph = null;
}
currentDivision = divisionMatcher.group(1).toUpperCase();
currentSection = null;
if ("IDENTIFICATION".equalsIgnoreCase(currentDivision)) {
inProgramId = false;
programIdBuilder.setLength(0);
}
}
if (currentDivision != null) {
if ("IDENTIFICATION".equalsIgnoreCase(currentDivision) && programName == null) {
processProgramIdLine(trimmedLine);
}
}
Matcher sectionMatcher = SECTION_PATTERN.matcher(trimmedLine);
if (sectionMatcher.find()) {
if (currentParagraph != null) {
paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
currentContent = null;
currentParagraph = null;
}
currentSection = sectionMatcher.group(1).toUpperCase();
}
Matcher paragraphMatcher = PARAGRAPH_PATTERN.matcher(trimmedLine);
if (paragraphMatcher.find()) {
if (currentParagraph != null) {
paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
}
currentParagraph = paragraphMatcher.group(1).toUpperCase();
currentContent = new StringBuilder();
currentContent.append(line).append("\n");
} else if (currentParagraph != null) {
currentContent.append(line).append("\n");
}
}
if (currentParagraph != null) {
paragraphs.add(new ParagraphInfo(currentDivision, currentSection, currentParagraph, currentContent.toString()));
}
}
return paragraphs;
}
private void processProgramIdLine(String line) {
if (inProgramId) {
if (line.contains(".")) {
int dotIndex = line.indexOf('.');
programIdBuilder.append(line.substring(0, dotIndex));
programName = programIdBuilder.toString().trim().replaceAll("\\s+", "-");
inProgramId = false;
} else if (line.endsWith("-")) {
programIdBuilder.append(line.substring(0, line.length()-1).trim());
} else {
programIdBuilder.append(line.trim());
}
} else {
Matcher matcher = PROGRAM_ID_PATTERN.matcher(line);
if (matcher.find()) {
String remaining = line.substring(matcher.end()).replaceAll("\\.", "").trim();
if (!remaining.isEmpty()) {
programName = remaining.split("\\s+")[0];
} else {
inProgramId = true;
}
}
}
}
public String getProgramName() {
return programName;
}
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.out.println("Usage: java CobolParser <inputFile> <outputDir>");
return;
}
File inputFile = new File(args[0]);
String outputDir = args[1];
CobolParser parser = new CobolParser();
List<ParagraphInfo> paragraphs = parser.parse(inputFile);
String programName = parser.getProgramName();
if (programName == null) {
String fileName = inputFile.getName();
int dotIndex = fileName.lastIndexOf('.');
programName = dotIndex == -1 ? fileName : fileName.substring(0, dotIndex);
}
for (ParagraphInfo p : paragraphs) {
saveParagraph(p, programName, outputDir);
}
}
private static void saveParagraph(ParagraphInfo p, String programName, String outputDir) throws IOException {
String safeDivision = p.getDivision().replaceAll("[^A-Z0-9-]", "_");
String safeSection = p.getSection().replaceAll("[^A-Z0-9-]", "_");
String safeName = p.getName().replaceAll("[^A-Z0-9-]", "_");
String dirName = String.format("%s_%s_%s", programName, safeDivision, safeSection);
Path dirPath = Paths.get(outputDir, dirName);
Files.createDirectories(dirPath);
String fileName = safeName + ".txt";
Path filePath = dirPath.resolve(fileName);
Files.write(filePath, p.getContent().getBytes(StandardCharsets.UTF_8));
}
}
代码说明
-
类结构:
CobolParser
类负责解析COBOL文件。ParagraphInfo
是内部类,用于存储每个段落的信息。
-
正则表达式:
- 使用正则表达式匹配COBOL的Division、Section和Paragraph结构。
-
解析逻辑:
- 逐行读取文件,跳过注释。
- 识别Division、Section和Paragraph,并记录当前层级。
- 处理PROGRAM-ID的提取,支持多行和续行。
-
文件保存:
- 根据程序名、Division、Section生成目录结构。
- 确保文件名合法,替换非法字符。
- 将每个段落的内容写入对应的文件。
使用方式
-
编译代码:
javac CobolParser.java
-
运行程序:
java CobolParser input.cbl output_directory
注意事项
- 程序名提取:优先从
PROGRAM-ID
中获取,否则使用文件名。 - 文件路径:处理特殊字符确保路径有效。
- 编码格式:使用UTF-8保存文件,确保兼容性。
此代码能够高效解析COBOL结构,准确分割各层级并保存到指定路径,满足用户需求。