python decode gbk_使用Python实现GBK转unicode码查询表

#!/usr/bin/python

# encoding: utf-8

import os

import datetime

import time

SRC = "gbkuni30.txt"

DST = "gbkuni30_gen1.h"

ARRAY = "gbkuni30"

buffer = [] # 空列表

max_num = 0

# 初始化好buffer,一共65535

for i in range(0, 65535):

buffer.append(0x0)

try:

f = open(SRC, 'r')

while True:

l = f.readline()

if l == '':

break;

s = l.strip().split(':') #以:分割,生成不同个数的列表

if len(s) == 2:

x1 = int(s[0], 16) # 字符串转换为十六进制

x2 = int(s[1], 16)

buffer[x2] = x1 # 针对索引赋值

if x2 > max_num:

max_num = x2

#print("%04x %04x" % (x2, x1))

print("max num %d %x len: %d" % (max_num, max_num, len(buffer)))

except:

raise

f = open(DST, "w")

test = "/**********************************************************************************/\n"

test += "/* GBK(GB18030) to UNICODE table, powered by Late Lee */\n"

test += "/* http://www.latelee.org */\n"

test += "/* %s */\n" % (datetime.datetime.now())

test += "/* The source file comes from: */\n"

test += "/* http://icu-project.org/repos/icu/data/trunk/charset/source/gb18030/gbkuni30.txt*/\n"

test += "/**********************************************************************************/\n"

test += "#ifndef __GBK2UNICODE__H\n"

test += "#define __GBK2UNICODE__H\n\n"

test += "static unsigned short %s[] = \n{\n" % (ARRAY)

f.write(test) # write text to file

####

cnt=0

for i in range(0x8140, max_num+1):

#print("%x -- 0x%x" % (i, buffer[i]))

ch = "0x%04x, " % (buffer[i])

f.write(ch)

cnt+=1;

if cnt % 10 == 0:

tmp = " // line num %d \n" % (cnt / 10 - 1)

f.write(tmp)

########

test= "\n"

test+= "};\n\n"

test+= "#endif //__GBK2UNICODE__H\n"

f.write(test) # write text to file

f.close()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值