Java 自制小说工具实现第一人称转第三人称（由我至主角）

-小言-
已于 2023-03-26 18:50:16 修改
阅读量1.9k
点赞数
文章标签： java 开发语言算法小说
于 2023-03-18 14:07:35 首次发布
本文链接：https://blog.csdn.net/qq_40697553/article/details/129635997
版权
爱看小说的兄弟们，是否遇到过这种情况，好不容易挑到一本好书，点进去，发现作者是用第一人称写的，以“我”这一视角来描述故事，看着总有些膈应，各种不习惯。笔者也遭遇了这种窘境，本想着直接一键替换，把“我”替换成主角名称，结果发现各种毛病，后面开始着手自制一个工具类，一点点发现问题并逐步改良，如下：
package cn.cheng.util;

import java.io.*;

/**
 * 小说改良工具
 * @auther -小言-
 */
public class NovelReformer {

    public static boolean continueModify = true;

    /**
     * 小说文本 第三人称化(第一人称转为第三人称)
     * 注：给定小说 要有规律，小说中对话、书信、致辞等内容得用中文结束符（。？！等）结尾并用双引号括起来，尽量一段完整的对话在同一行
     * @param textFileUrl 小说文件地址
     * @param name 小说主角姓名
     */
    public static void textFileThirdPersonification(String textFileUrl, String name) {
        long startTime = System.currentTimeMillis(); // 方法开始执行时间
        if (textFileUrl != null) {
            File novelFile = new File(textFileUrl);
            if (novelFile.exists()) {
                // 导出后小说名及其地址
                String newFileUrl = novelFile.getParent() + File.separator + "(第三人称版)" + novelFile.getName();
                // IO流 读取文本 调整后 写入新文本
                try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(novelFile), "UTF-8"));
                     BufferedWriter bw = new BufferedWriter((new OutputStreamWriter(new FileOutputStream(newFileUrl), "UTF-8")))
                ) {
                    String lineStr = "";
                    while((lineStr = br.readLine()) != null) {
                        // 逐行调整并写入
                        lineStr = textPersonFrom1To3(lineStr, name);
                        bw.write(lineStr);
                        bw.newLine();
                    }
                    long endTime = System.currentTimeMillis(); // 方法结束执行时间
                    System.out.println("一共用了" + ((endTime - startTime) / 1000F) + "秒");
                } catch (Exception e) {
                    System.out.println("文件处理异常：" + e.getMessage());
                }
            }
        }
    }

    /**
     * 将给定文本从第一人称转为第三人称
     * 注：给定文本 要有规律，比如 中对话、书信、致辞等内容得用中文结束符（。？！等）结尾并用双引号括起来，尽量一段完整的对话在同一行
     * @param text 要处理的文本
     * @param name 第三人称者名字
     * @return
     */
    public static String textPersonFrom1To3(String text, String name) {
        String rtnText = "";
        if (text != null && !"".equals(text.trim())) {
            if (name != null && !"".equals(name.trim())) {
                // 引号处理 方便后续操作
                text = text.replaceAll("”", " ” ").replaceAll("“", " “ ");
                // 按前引号进行切割
                String[] quotationMarkFirstS = text.split("“");
                if (quotationMarkFirstS.length > 1) {
                    // 替换第一组字符串中第一人称为给定的第三人称
                    quotationMarkFirstS[0] = replace1to3(quotationMarkFirstS[0], name);
                    // 遍历 前引号切割的子串
                    for (int i = 0; i < quotationMarkFirstS.length; i++) {
                        // 按后引号进行切割
                        String[] quotationMarkEndS = quotationMarkFirstS[i].split("”");
                        if (quotationMarkEndS.length > 1) {
                            // 替换最后一组字符串中第一人称为给定的第三人称
                            quotationMarkEndS[quotationMarkEndS.length -1] = replace1to3(quotationMarkEndS[quotationMarkEndS.length -1], name);
                        }
                        // 还原split按后引号切割的字符串数组
                        String firstStr = revertSplit("”", quotationMarkEndS);
                        // 还原split按前引号切割的字符串数组
                        if (i != 0) {
                            rtnText += "“" + firstStr;
                        } else {
                            rtnText += firstStr;
                        }
                    }
                } else {
                    // 按后引号进行切割
                    String[] quotationMarkEndS = text.split("”");
                    if (quotationMarkEndS.length > 1) {
                        // 替换最后一组字符串中第一人称为给定的第三人称
                        quotationMarkEndS[quotationMarkEndS.length -1] = replace1to3(quotationMarkEndS[quotationMarkEndS.length -1], name);
                        // 还原split按后引号切割的字符串数组
                        rtnText = revertSplit("”", quotationMarkEndS);
                    } else {
                        // 某些段落引号不全，判断后才进行替换操作
                        rtnText = continueModify ? replace1to3(text, name) : text;
                    }
                }
                // 判断下一行 没有引号时 是否需要继续调整
                checkContinue(text);
                // 引号还原
                rtnText = rtnText.replaceAll(" ” ", "”").replaceAll(" “ ", "“");
            } else {
                rtnText = text;
            }
        }
        return rtnText;
    }

    /**
     * 替换字符串中第一人称为给定的第三人称
     * @param text 需替换的文本字符串
     * @param name 第三人称者名字
     * @return
     */
    public static String replace1to3(String text, String name) {
        String rtnStr = "";
        if (text != null && name != null) {
            if (text.contains("：")) {
                String colonSplit[] = text.split("：");
                if (colonSplit.length == 2) {// 冒号后面的主语 一般不用转换
                    rtnStr = replaceAllWO(colonSplit[0], name) + "：" + colonSplit[1];
                } else {
                    rtnStr = replaceAllWO(text, name);
                }
            } else {
                rtnStr = replaceAllWO(text, name);
            }
            // 还原固定词语中的 “我”
            if (rtnStr.contains(name)) {
                rtnStr = revertWordsWO(rtnStr, name);
            }
        }
        return rtnStr;
    }

    /**
     * 替换文本中的”我“
     * @param text
     * @param name
     * @return
     */
    public static String replaceAllWO(String text, String name) {
        String rtnStr = "";
        if (text != null) {
            // 书名号括起来的内容 不需要调整，需按情况处理
            if (text.contains("《") && countMark(text, "《") == countMark(text, "》")) {
                text = text.replaceAll("《", " 《 ").replaceAll("》", " 》 ");
                // 前书名号 切割的 字符串数组
                String bookTitleFirstS[] = text.split("《");
                bookTitleFirstS[0] = bookTitleFirstS[0].replaceAll("我们", name +"他们").replaceAll("我", name);
                for (int i = 0; i < bookTitleFirstS.length; i++) {
                    // 后书名号 切割的 字符串数组
                    String bookTitleEndS[] = bookTitleFirstS[i].split("》");
                    String firstStr = "";
                    if (bookTitleEndS.length > 1) {
                        bookTitleEndS[bookTitleEndS.length -1] = bookTitleEndS[bookTitleEndS.length -1].replaceAll("我们", name +"他们").replaceAll("我", name);
                    }
                    firstStr = revertSplit("》", bookTitleEndS);
                    if (i != 0) {
                        rtnStr += "《" + firstStr;
                    } else {
                        rtnStr += firstStr;
                    }
                }
                rtnStr = rtnStr.replaceAll(" 《 ", "《").replaceAll(" 》 ", "》");
            } else {
                rtnStr =  text.replaceAll("我们", name +"他们").replaceAll("我", name);
            }
        }
        return rtnStr;
    }

    /**
     * 统计文本中指定字符的个数
     * @param text 原文本
     * @param mark 指定符号
     * @return
     */
    public static int countMark(String text, String mark) {
        int rtnNum = 0;
        if (text != null && mark != null) {
            if (text.contains(mark)) {
                rtnNum = text.length() - text.replaceAll(mark, "").length();
            }
        }
        return rtnNum;
    }

    /**
     * 还原split切割的字符串数组
     * @param mark 切割符
     * @param splitArr 切割生成的字符串数组
     * @return
     */
    public static String revertSplit(String mark, String[] splitArr) {
        String originalStr = "";
        if (mark != null && splitArr != null && originalStr != null) {
            for (int j = 0; j < splitArr.length; j++) {
                if (j != splitArr.length - 1) {
                    originalStr += splitArr[j] + mark;
                } else {
                    originalStr += splitArr[j];
                }
            }
        }
        return originalStr;
    }

    /**
     * 判断 下一行 是否 执行某项操作
     * @param text 原文本
     * @return
     */
    public static void checkContinue(String text) {
        if (text != null) {
            // 有引号时才判断
            if (text.lastIndexOf("“") != -1 || text.lastIndexOf("”") != -1) {
                // 前引号最后出现 （即非正常段落） 时
                if (text.lastIndexOf("”") < text.lastIndexOf("“")) {
                    // 下一段 不执行 某项操作
                    continueModify = false;
                } else { // 否则
                    // 下一段 继续执行 某项操作
                    continueModify = true;
                }
            }
        }
    }

    /**
     * 还原词语中的 “我”
     * @param text
     * @param name
     * @return
     */
    public static String revertWordsWO(String text, String name) {
        String rtnStr = "";
        if (text != null && name != null) {
            rtnStr = text.replaceAll("忘" + name, "忘我");
            rtnStr = rtnStr.replaceAll("自" + name, "自我");
            rtnStr = rtnStr.replaceAll("大" + name, "大我");
            rtnStr = rtnStr.replaceAll("小" + name, "小我");
            rtnStr = rtnStr.replaceAll("敌" + name, "敌我");
            rtnStr = rtnStr.replaceAll("你" + name, "你我");
            rtnStr = rtnStr.replaceAll("物" + name, "物我");
            rtnStr = rtnStr.replaceAll("无" + name, "无我");
            rtnStr = rtnStr.replaceAll(name + "见犹怜", "我见犹怜");
            rtnStr = rtnStr.replaceAll("你追" + name + "赶", "你追我赶");
            rtnStr = rtnStr.replaceAll("你来" + name + "往", "你来我往");
            rtnStr = rtnStr.replaceAll("敌暗" + name + "明", "敌暗我明");
            rtnStr = rtnStr.replaceAll("敌进" + name + "退", "敌进我退");
            rtnStr = rtnStr.replaceAll("敌驻" + name + "扰", "敌驻我扰");
            rtnStr = rtnStr.replaceAll("敌疲" + name + "打", "敌疲我打");
            rtnStr = rtnStr.replaceAll("敌退" + name + "追", "敌退我追");
            rtnStr = rtnStr.replaceAll("天生" + name + "才", "天生我才");
            rtnStr = rtnStr.replaceAll("尔虞" + name + "诈", "尔虞我诈");
            rtnStr = rtnStr.replaceAll("你死" + name + "活", "你死我活");
            rtnStr = rtnStr.replaceAll("时不" + name + "待", "时不我待");
            rtnStr = rtnStr.replaceAll("唯" + name + "独尊", "唯我独尊");
            rtnStr = rtnStr.replaceAll("舍" + name + "其谁", "舍我其谁");
            rtnStr = rtnStr.replaceAll("卿卿" + name + name, "卿卿我我");
            rtnStr = rtnStr.replaceAll(name + "行" + name + "素", "我行我素");
        }
        return rtnStr;
    }

    // 测试
    public static void main(String[] args) {
        NovelReformer.textFileThirdPersonification("C:\\Users\\***\\Desktop\\限制级末日症候-1.txt", "高川");
    }
}
当然，还是有一些小瑕疵，毕竟不是人工智能，以笔者生硬的算法对博大精深的汉语还是有些束手无策，而且暂未考虑效率问题（方法调用时间也就1秒多钟），望诸君交流指正！