【20200904】
可以先split/,做好特殊字符标记,存储到临时变量里面,比如,元组,数组,或者字典之类的;再遍历上面的变量,拆分括号,用一个特殊标记,标记括号里面的内容,总之找到区分括号和非括号内容就可以,之后存储到变量;最后遍历第二个变量,生成句型
【20200905】
抱歉最近精神状态不太好,又比较忙,今天大概写了下,应该没有啥问题,还有就是生成循序的问题,这个我有时间再看下,如果要改的话大概是bottom_fuc函数,和调用它的那里的逻辑。还有一种方式就是对每个句型生成一个列表,最后直接joint,但是我觉得这样会占更大的缓存空间,所以没有用。代码直接贴上来
import logging
import re
f = open("./phasesplit")
line_true = f.readline()
list_all = []
list_size = 0
i = 0
# 将两个参数进行排列组合连接
# inner_list:待添加的字符串列表
# org_str_list:已经连接的字符串列表
def bottom_fuc(inner_list = list, org_str_list = list):
inner_new_str_list = list()
for s in inner_list:
st = str(s)
for s1 in org_str_list:
st1 = str(s1)
inner_new_str_list.append(st1 + " " + st)
return inner_new_str_list
#主循环
while line_true:
# 保存分号后的内容
semi_str = ""
# 分号前面的内容
line = ""
# 可以判断分号个数,这里不进行判断
if line_true.find(";") > 0:
# 赋值
line, semi_str = line_true.split(";")
semi_str = str(semi_str).strip()
line = str(line).strip()
else:
line = line_true
list_for_loop = re.split("(\(.+?\))", line)
list_for_loop_new = []
# 继续进行数据置换
for ind, lp in enumerate(list_for_loop, 0):
tmp_lp = lp
# 存在空格且没有括号
if tmp_lp.find("(") + tmp_lp.find(")") < 0 and tmp_lp.find(" "):
# 进行置换
for data in tmp_lp.split(" "):
list_for_loop_new.append(data)
else:
list_for_loop_new.append(lp)
list_str = []
# 将数据进行进一步拆分
for ind, s in enumerate(list_for_loop_new, 0):
str_tmp = s
pare_flg = 0
# 去除括号,添加空格
if str_tmp.find("(")+str_tmp.find(")") >= 0:
str_tmp = str_tmp.strip(r"(").strip(r")")
str_tmp = " /"+ str_tmp
pare_flg = 1
# 按/拆分
if str_tmp.find("/") >= 0:
if pare_flg == 1:
pare_str = str_tmp.split("/")
list_str.append(pare_str)
else:
list_str.append(str_tmp.split("/"))
else:
list_str.append(str_tmp)
pare_flg = 0
new_str_list = []
# 组装拆分后的数据
for l_str in list_str:
if isinstance(l_str, str):
if len(new_str_list) == 0:
new_str_list.append(l_str)
else:
for ind, ns in enumerate(new_str_list, 0):
new_str_list[ind] = new_str_list[ind] + " " +l_str
elif isinstance(l_str, list):
if len(new_str_list) == 0:
new_str_list.append("")
new_str_list = bottom_fuc(l_str, new_str_list)
else:
logging.error("错误类型: ", type(l_str), l_str)
exit(-1)
# 格式处理
for ind, ns in enumerate(new_str_list, 0):
ns.rstrip("\r\n")
if len(semi_str) > 0:
new_str_list[ind] = re.sub(" {2,}", " ", new_str_list[ind].strip()) + ";" + semi_str
else:
new_str_list[ind] = re.sub(" {2,}", " ", new_str_list[ind].strip())
if len(semi_str) > 0:
new_str_list.insert(0, line + ";" + semi_str)
else:
new_str_list.insert(0, line.rstrip("\r\n"))
i += 1
# 读取下一行
line_true = f.readline()
# 添加到总列表
list_all.append(new_str_list)
list_size = i
f.close()
# 写文件
with open("result.txt", "w") as nf:
nf.write("#############################################\r")
nf.write("#section:{}\r".format(list_size))
nf.write("#############################################\r")
for la in list_all:
for nl in la:
nf.write(nl+"\r")
nf.write("\r")
nf.write("#############################################\r")
nf.close()
输入文件(phasesplit)
quarrel (with sb) about/for/over ; 2313
dabble at/in/with
(sb/sth) damn and blast (sb/sth)
dance on/upon a rope/nothing
dance on (the) air
dead/flat/stark calm
do/go/make the/one's round
do (sb/sth) grace
输出文件(result.txt)
#############################################
#section:8
#############################################
quarrel (with sb) about/for/over;2313
quarrel about;2313
quarrel with sb about;2313
quarrel for;2313
quarrel with sb for;2313
quarrel over;2313
quarrel with sb over;2313
#############################################
dabble at/in/with
dabble at
dabble in
dabble with
#############################################
(sb/sth) damn and blast (sb/sth)
damn and blast
sb damn and blast
sth damn and blast
damn and blast sb
sb damn and blast sb
sth damn and blast sb
damn and blast sth
sb damn and blast sth
sth damn and blast sth
#############################################
dance on/upon a rope/nothing
dance on a rope
dance upon a rope
dance on a nothing
dance upon a nothing
#############################################
dance on (the) air
dance on air
dance on the air
#############################################
dead/flat/stark calm
dead calm
flat calm
stark calm
#############################################
do/go/make the/one's round
do the round
go the round
make the round
do one's round
go one's round
make one's round
#############################################
do (sb/sth) grace
do grace
do sb grace
do sth grace
#############################################