python实现将汉字转换成汉语拼音的库_python_脚本之家 -
http://www.jb51.net/article/65496.htm
python实现中文转拼音-keyxl-ChinaUnix博客 -
http://blog.chinaunix.net/uid-26638338-id-3830276.html
中文拼音五笔转换带声调 - 在线工具 -
http://tool.lu/py5bconvert/
pinyin4py 1.0.dev : Python Package Index -
https://pypi.python.org/pypi/pinyin4py
汉字编码表 - 下载频道 - CSDN.NET -
http://download.csdn.net/download/slowwind9999/291213
#!/usr/bin/python
#coding:utf-8
#2015-11-04 21:23:17.230000
"""
改编自:python实现将汉字转换成汉语拼音的库_python_脚本之家 - http://www.jb51.net/article/65496.htm
从这里下的字典表(文件编码要转成utf8的): 汉字编码表 - 下载频道 - CSDN.NET - http://download.csdn.net/detail/slowwind9999/291213
可以用在线工具验证: 中文拼音五笔转换带声调 - 在线工具 - http://tool.lu/py5bconvert/
如果要新增函数把汉字转换为其他编码,仿照hanzi2pinyin或hanzi2wubi,再增加一个字典项并且在load_word里加载数据就行了;
"""
import sys
reload(sys)
sys.setdefaultencoding('utf8')
__version__ ='0.9'
__all__ =["PinYin"]
import os.path
classHanzi2code(object):
def __init__(self, dict_file='code.txt'): #code.txt的编码是utf8
self.word_dict ={}
self.wubi_dict ={}
self.dict_file = dict_file
self.load_word()#qxx 对象实例就加载word
def load_word(self):
ifnot os.path.exists(self.dict_file):
raiseIOError("NotFoundFile")
with file(self.dict_file)as f_obj:
codeList = f_obj.readlines()[6:]
for f_line in codeList:
try:
line = f_line.strip().split()
self.word_dict[line[0]]= line[1]
self.wubi_dict[line[0]]= line[2]
except:
print'err....'
# line = f_line.split()
# self.word_dict[line[0]] = line[1]
# def hanzi2pinyin(self, string=""):
# result = []
# if not isinstance(string, unicode):
# string = string.decode("utf-8")
# for char in string:
## key = '%X' % ord(char)
# result.append(self.word_dict.get(char.encode('utf8'), char).split()[0].lower())
# return result
def hanzi2pinyin_split(self, string="", split=""):
result = self.hanzi2pinyin(string=string)
if split =="":
return result
else:
return split.join(result)
def hanzi2code(self,string='',dic={}):
result =[]
ifnot isinstance(string, unicode):
string = string.decode("utf-8")
for char in string:
# key = '%X' % ord(char)
result.append(dic.get(char.encode('utf8'), char).split()[0].lower())
return result
def hanzi2wubi(self,string=''):
return self.hanzi2code(string,self.wubi_dict)
def hanzi2pinyin(self,string=''):
return self.hanzi2code(string,self.word_dict)
if __name__ =="__main__":
test =Hanzi2code()
string ="钓鱼岛是中国的"
print"in: %s"% string
print"out: %s"% str(test.hanzi2pinyin(string=string))
print"out: %s"% test.hanzi2pinyin_split(string=string, split="-")
print"out: %s"% str(test.hanzi2wubi(string=string))