Python:合并段落方法

合并段落:
将以非法段落结束符结束的段落和下一段落进行合并,并去掉空白行和段落首尾的空白字符。

def isEndOfP(line):
    notendstrs = ["www.", "文章"]
    for notendstr in notendstrs:
        if line.endswith(notendstr):
            return False
    endstrs = ["\"", ".", "”", "。", "!", "?", "!", "?", "……", "…", "》", ":", ":", ";", ";", "1", "2", "3",
                "4", "5", "6", "7", "8", "9", "0", "章", "部", "录", "著", "译", "言", "~", "---", "」"]
    for endstr in endstrs:
        if line.endswith(endstr):
            return True
    return False

def isStrD(line):
    strDa = [ "\"", "(", "{", "[", "《", "“", "‘", "(", "{", "【" ]
    strDb = [ "\"", ")", "}", "]", "》", "”", "’", ")", "}", "】" ]
    for i in range(0, len(strDa)):
        if countSubString(line, strDa[i]) != countSubString(line, strDb[i]):
            return False
    return True

def countSubString(line, substr):
    if line is None or line == "":
        return 0
    index = 0
    count = 0
    while index < len(line):
        index = line.find(substr, index) + 1
        if index == 0:
            break
        count += 1
    return count

def isP(line):
    return isEndOfP(line) and isStrD(line)

def formated(content):
    lines = content.split("\n")
    res = ""
    for line in lines:
        endLine = ""
        if isP(line.strip()):
            endLine = "\n"   
        res += line.strip() + endLine
    return res

转载于:https://www.cnblogs.com/xuejianbest/p/10285124.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值