文字处理—巧拆中英文
有时在进行问题时,需要把英文和中文拆开进行处理,这是就要用到拆解代码了!
#coding=utf-8
def split_ascii(s):
for ii, ch in enumerate(s):
if not ch.isspace() and ch.isascii():
break
return s[ii:],s[:ii].rstrip()
def first_deal():
with open(rf"F:\物理练习图\九年级\字典组建试题\英语源DOC\ba1_1.txt") as f:
quizFile = open(r'F:\物理练习图\九年级\字典组建试题\英语源DOC\ba1_1.txt','w',encoding='utf-8')
#for num,line in enumerate(f):
for line in f:
a,b = split_ascii(line)
a = ''.join(a)
b = b.split('\n')
b = ''.join(b)
quizFile.write(b)
quizFile.write('|')
quizFile.write(a)
quizFile.write('\n')
quizFile.close()
#print(b)
def second_deal():
import re
with open(rf"F:\物理练习图\九年级\字典组建试题\英语源DOC\ba1_1.txt") as f:
quizFile = open(r'F:\物理练习图\九年级\字典组建试题\英语源DOC\ba1_last.txt', 'w', encoding='utf-8')
for line in f:
en_letter = '[\u0041-\u005a|\u0061-\u007a]+' # 大小写英文字母
zh_char = '[\u4e00-\u9fa5]+' # 中文字符
en = re.findall(en_letter,line)
en = ' '.join(en)
zh = re.findall(zh_char,line)
zh = ''.join(zh)
quizFile.write(zh)
quizFile.write(en)
quizFile.write('\n')
quizFile.close()
second_deal()