将GBK汉字转化为拼音的Python小程序

#!/usr/bin/python
# -*- coding: cp936 -*-

#################################
#   Written by caocao           #
#   caocao@eastday.com          #
#   http://nethermit.yeah.net   #
#################################

import sys
import re
import string

class CConvert:
 def __init__(self):
  "Load data table"
  try:
   fp=open("convert.txt")
  except IOError:
   print "Can't load data from data.txt/nPlease make sure this file exists."
   sys.exit(1)
  else:
   self.data=fp.read()
   fp.close()
 
 def convert(self, strIn):
  "Convert GBK to PinYin"
  length, strOutKey, strOutValue, i=len(strIn), "", "", 0
  while i<length:
   if i==length-1:
    strOutKey+=strIn[i:i+1]+" "
    strOutValue+=strIn[i:i+1]+" "
    break
   code1, code2=ord(strIn[i:i+1]), ord(strIn[i+1:i+2])
   if code1>=0x81 and code1<=0xFE and code2>=0x40 and code2<=0xFE and code2!=0x7F:
    strTemp=self.getIndex(strIn[i:i+2])
    strLength=len(strTemp)
    if strLength<2:strLength=2
    strOutKey+=string.center(strIn[i:i+2], strLength)+" "
    strOutValue+=string.center(strTemp, strLength)+" "
    i+=1;
   else:
    strOutKey+=strIn[i:i+1]+" "
    strOutValue+=strIn[i:i+1]+" "
   i+=1
  return [strOutValue, strOutKey]
 
 def getIndex(self, strIn):
  "Convert single GBK to PinYin from index"
  pos=re.search("^"+strIn+"([0-9a-zA-Z]+)", self.data, re.M)
  if pos==None:
   return strIn
  else:
   return pos.group(1)
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值