# -*- coding: utf-8 -*-
# @Time : 2024/6/18 10:26
# @Author : Cocktail_py
import jieba.posseg as pseg
def cut_sentences(text):
"""中文句子分割"""
# 使用jieba的分句模块
sentences = pseg.cut(text)
result = []
tmp = []
for word, flag in sentences:
if word in ['。','!','?'] and flag == 'x': # 'x'表示单独一个句子
tmp.append(word)
if tmp:
result.append(''.join(tmp))
tmp = []
else:
tmp.append(word)
if tmp:
result.append(''.join(tmp))
return result
text = "你好,我是小明。我今年18岁了。"
print(cut_sentences(text))
当遇到以下异常时降低jieba版本,改为jieba==0.40即可
TypeError: __repr__ returned non-string (type bytes)