python字符串编码测试

最新推荐文章于 2020-12-22 10:07:37 发布

shareinfo2018

最新推荐文章于 2020-12-22 10:07:37 发布

阅读量317

点赞数

分类专栏： script

本文链接：https://blog.csdn.net/chenyefei/article/details/77873339

版权

script 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

#encoding=utf8
import chardet

def str_detect(str):
    try:
        print ":".join("{:02x}".format(ord(c)) for c in str)
        t = chardet.detect(str)
        print t
        if t['encoding']=="utf-8":
            print str
    except:
        print type(str)
        pass
    print ""

def enc(str, enc):
    try:
        s = str.encode(enc)
        str_detect(s)
    except:
        print "ERR:encode"
     
def dec(str, enc):
    try:
        s = str.decode(enc)
        str_detect(s)
    except:
        print "ERR:decode"
        pass

print "============================="
cn = "中文"
str_detect(cn)

cn1 = u"中文1"
str_detect(cn1)

enc(cn1, 'utf-8')
enc(cn1, 'utf-16')
enc(cn1, 'gb2312')
enc(cn1, "ISO-8859-1")


cn2 = "中文2"
str_detect(cn2)

dec(cn2, 'utf-8')
dec(cn1, 'utf-16')
dec(cn1, 'gb2312')
dec(cn1, "ISO-8859-1")

cn3 = "中文3"
dec_str = cn3.decode('utf-8')
str_detect(dec_str)
enc_str = dec_str.encode('utf-16')
str_detect(enc_str)
end_str = enc_str.decode('utf-16')
str_detect(end_str)

# str利用decode方法根据str的编码将其解码为unicode字符串类型
# str利用encode根据特定的编码将unicode字符串类型转换为特定的编码
# 注：系统不一样结果会不同

结果：

=============================
e4:b8:ad:e6:96:87
{'confidence': 0.7525, 'language': '', 'encoding': 'utf-8'}
中文

4e2d:6587:31
<type 'unicode'>

e4:b8:ad:e6:96:87:31
{'confidence': 0.7525, 'language': '', 'encoding': 'utf-8'}
中文1

ff:fe:2d:4e:87:65:31:00
{'confidence': 1.0, 'language': '', 'encoding': 'UTF-16'}

d6:d0:ce:c4:31
{'confidence': 0.682639754276994, 'language': 'Russian', 'encoding': 'KOI8-R'}

ERR:encode
e4:b8:ad:e6:96:87:32
{'confidence': 0.7525, 'language': '', 'encoding': 'utf-8'}
中文2

4e2d:6587:32
<type 'unicode'>

ERR:decode
ERR:decode
ERR:decode
4e2d:6587:33
<type 'unicode'>

ff:fe:2d:4e:87:65:33:00
{'confidence': 1.0, 'language': '', 'encoding': 'UTF-16'}

4e2d:6587:33
<type 'unicode'>

shareinfo2018

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python字符串编码测试

#encoding=utf8import chardetdef str_detect(str): try: print ":".join("{:02x}".format(ord(c)) for c in str) t = chardet.detect(str) print t if t['encoding']=="utf
复制链接

扫一扫