# 按最大长度拆分单行文本,尽可能拆分出完整的句子
def split_text(text, max_len):
pattern = r'[\s\S]*[。?!…]+”*(?![^(]*(?:)))|[\s\S]+'
sentence_list = []
prefix_str = ''
split_num = int(len(text) / max_len) + 1
split_len = int(len(text) / split_num) + 1
for i in range(split_num):
start_index = split_len * i
end_index = start_index + split_len
tmp_text = text[start_index:end_index]
tmp_sentence = tmp_text
if i < split_num - 1:
quotes_l = tmp_sentence.rfind('“')
quotes_r = tmp_sentence.rfind('”')
if quotes_l > quotes_r and quotes_l > split_len / 2:
tmp_sentence = tmp_sentence[:quotes_l]
tmp_sentence = re.match(pattern, tmp_sentence).group()
sentence = prefix_str + tmp_sentence
prefix_str = tmp_text.replace(tmp_sentence, '')
sentence_list.append(sentence)
return sentence_list
ChatGPT真不错!