# file: Unicode2.py
# -*- coding: utf-8 -*-
import
chilkat
# The CkString object can handle any character encoding.
s1
=
chilkat.CkString()
# The appendEnc method allows us to append a string in any encoding.
s1.appendEnc(
'èéêëabc'
,
'utf-8'
)
# If you're working with different encodings, you may wish
# to name your string variables to reflect the encoding.
strAnsi
=
s1.getAnsi()
strUtf8
=
s1.getUtf8()
# Prints "7"
print
len
(strAnsi)
# Prints "11"
print
len
(strUtf8)
# getNumChars returns the number of characters
print
'Num Chars: '
+
str
(s1.getNumChars())
# utf-8 chars do not have a constant number of bytes/char.
# A single utf-8 char is represented in 1 to 6 bytes.
print
'utf-8: '
+
str
(s1.getSizeUtf8())
# ANSI is typically 1 byte per/char, but for some languages
# such as Japanese, ANSI equates to a character encoding that may
# not be 1 byte/char. (Shift_JIS is the ANSI encoding for Japanese)
print
'ANSI: '
+
str
(s1.getSizeAnsi())
# Let's create an English/Japanese string.
s2
=
chilkat.CkString()
s2.appendEnc(
'abc愛知県新城市の'
,
'utf-8'
)
# We can get the string in any multibyte encoding.
print
's2 num chars = '
+
str
(s2.getNumChars())
strShiftJIS
=
s2.getEnc(
'shift_JIS'
)
print
'Shift-JIS num bytes = '
+
str
(
len
(strShiftJIS))
strIso2022JP
=
s2.getEnc(
'iso-2022-jp'
)
print
'iso-2022-jp num bytes = '
+
str
(
len
(strIso2022JP))
strEucJp
=
s2.getEnc(
'euc-jp'
)
print
'euc-jp num bytes = '
+
str
(
len
(strEucJp))
# We can save the string in any encoding
s2.saveToFile(
'out_shift_jis.txt'
,
'shift_JIS'
)
s2.saveToFile(
'out_iso_2022_jp.txt'
,
'iso-2022-jp'
)
s2.saveToFile(
'out_utf8.txt'
,
'utf-8'
)
s2.saveToFile(
'out_euc_jp.txt'
,
'euc-jp'
)
# You may mix any number of languages in a utf-8 string
# because utf-8 can encode characters in all languages.
# (utf-8 is the multi-byte encoding of Unicode)
#
# An ANSI string can generally hold us-ascii + the native language.
# For example, Shift_JIS can represent us-ascii characters
# in addition to Japanese characters.
# For example, this is OK
strShiftJis
=
'abc123'
+
s2.getEnc(
'shift_JIS'
)
# This is not OK:
strShiftJis2
=
'ςστυφ'
+
s2.getEnc(
'shift_JIS'
)
print
"Done!"