# -*- coding: utf-8 -*-
from itertools import chain
from pypinyin import pinyin, Style
import os
def to_pinyin(s):
'''转拼音
:param s: 字符串或列表
:type s: str or list
:return: 拼音字符串
>>> to_pinyin('你好吗')
'ni3hao3ma'
>>> to_pinyin(['你好', '吗'])
'ni3hao3ma'
'''
return ''.join(chain.from_iterable(pinyin(s, style=Style.TONE3)))
def sort_py(path, out_path):
"""" 中文排序(结尾匹配) """
# 读取要处理的数据
with open(path, "r", encoding='utf-8') as f:
datas = f.readlines()
news = []
for index, data in enumerate(datas):
# 去掉换行符
if data != '':
data = data.strip('\n')
if data not in news:
news.append(data)
sort = sorted(news, key=lambda to_pinyin: (to_pinyin[-1]))
with open(out_path, "w", encoding='utf-8') as f:
# 写入处理好的新数据
for data in sort:
f.write("{}\n".format(data))
for root, dirs, files in os.walk('I:\\REMOVE\\test_20211119'):
for file in files:
print(file)
if file.split('.')[-1] == 'txt':
file_first = os.path.join('I:\\REMOVE\\test_20211119',file)
file_second = os.path.join('I:\\REMOVE\\test_20211119',file.split('.')[0]+'_test.txt')
sort_py(file_first, file_second)
pypinyin
最新推荐文章于 2024-06-16 10:55:10 发布