python 字符编码处理_Python 处理各种编码的字符串

# file: Unicode2.py

# -*- coding: utf-8 -*-

import chilkat

# The CkString object can handle any character encoding.

s1 = chilkat.CkString()

# The appendEnc method allows us to append a string in any encoding.

s1.appendEnc('èéêëabc','utf-8')

# If you're working with different encodings, you may wish

# to name your string variables to reflect the encoding.

strAnsi = s1.getAnsi()

strUtf8 = s1.getUtf8()

# Prints "7"

print len(strAnsi)

# Prints "11"

print len(strUtf8)

# getNumChars returns the number of characters

print 'Num Chars: ' + str(s1.getNumChars())

# utf-8 chars do not have a constant number of bytes/char.

# A single utf-8 char is represented in 1 to 6 bytes.

print 'utf-8: ' + str(s1.getSizeUtf8())

# ANSI is typically 1 byte per/char, but for some languages

# such as Japanese, ANSI equates to a character encoding that may

# not be 1 byte/char. (Shift_JIS is the ANSI encoding for Japanese)

print 'ANSI: ' + str(s1.getSizeAnsi())

# Let's create an English/Japanese string.

s2 = chilkat.CkString()

s2.appendEnc('abc愛知県新城市の','utf-8')

# We can get the string in any multibyte encoding.

print 's2 num chars = ' + str(s2.getNumChars())

strShiftJIS = s2.getEnc('shift_JIS')

print 'Shift-JIS num bytes = ' + str(len(strShiftJIS))

strIso2022JP = s2.getEnc('iso-2022-jp')

print 'iso-2022-jp num bytes = ' + str(len(strIso2022JP))

strEucJp = s2.getEnc('euc-jp')

print 'euc-jp num bytes = ' + str(len(strEucJp))

# We can save the string in any encoding

s2.saveToFile('out_shift_jis.txt','shift_JIS')

s2.saveToFile('out_iso_2022_jp.txt','iso-2022-jp')

s2.saveToFile('out_utf8.txt','utf-8')

s2.saveToFile('out_euc_jp.txt','euc-jp')

# You may mix any number of languages in a utf-8 string

# because utf-8 can encode characters in all languages.

# (utf-8 is the multi-byte encoding of Unicode)

#

# An ANSI string can generally hold us-ascii + the native language.

# For example, Shift_JIS can represent us-ascii characters

# in addition to Japanese characters.

# For example, this is OK

strShiftJis = 'abc123' + s2.getEnc('shift_JIS')

# This is not OK:

strShiftJis2 = 'ςστυφ' + s2.getEnc('shift_JIS')

print "Done!"

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值