#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
f1=open("outout1.txt","w+",encoding='utf-8')
import codecs
with codecs.open("output.txt", 'r', encoding='utf-8',errors='ignore') as fdata:
Str = fdata.read()
pattern =re.compile(u"[\u4e00-\u9fa5]+")
result=re.findall(pattern,Str)
for w in result:
f1.write(w+"\n")
#print ("---",w)
f.close()
Python 正则表达式:把一篇文章处理为一句话一行
最新推荐文章于 2023-04-11 03:24:59 发布