如何使用文本字符串
Unicode
import unicodedata
def unicode_test (value) :
name = unicodedata.name(value)
value2 = unicodedata.lookup(name)
print "name %s" %name
print "value %s" %value
print "value2 %s" %value2
unicode_test("\u00a2" )
unicode_test("\u20ac" )
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-42-9a9ee43694fd> in <module>()
9 print "value2 %s" %value2
10
---> 11 unicode_test("\u00a2")
12 unicode_test("\u20ac")
<ipython-input-42-9a9ee43694fd> in unicode_test(value)
3 # only for Python 3.*
4 def unicode_test(value):
----> 5 name = unicodedata.name(value)
6 value2 = unicodedata.lookup(name)
7 print "name %s" %name
TypeError: name() argument 1 must be unicode, not str
snowman = "\u2603"
print len(snowman)
ds = snowman.encode("utf-8" )
print ds
ds = snowman.encode("ascii" ,"ignore" )
print ds
ds = snowman.encode("ascii" ,"replace" )
print ds
ds = snowman.encode("ascii" ,"backslashreplace" )
print ds
ds = snowman.encode("ascii" ,"xmlcharrefreplace" )
print ds
place = "caf\u00e9"
print place
print type(place)
place_bytes = place.decode("utf-8" )
print place_bytes
place2 = place_bytes.decode("utf-8" )
print place2
place3 = place_bytes.decode("ascii" )
print place3
place4 = place_bytes.decode("latin-1" )
print place4
place5 = place_bytes.decode("windows-1252" )
print place5
6
\u2603
\u2603
\u2603
\u2603
caf\u00e9
<type 'str'>
caf\u00e9
caf\u00e9
caf\u00e9
caf\u00e9
caf\u00e9
格式化
print "format int."
int_num = 123
print "%s" %int_num
print "%d" %int_num
print "%x" %int_num
print "%o" %int_num
print "\nformat float."
float_num = 1.23
print "%s" %float_num
print "%f" %float_num
print "%e" %float_num
print "%g" %float_num
print "\nformat interpolation."
name = "Allen Moore"
job = "System Designer"
weight = "128"
print "My job is %s" %job
print "My name is %s. His weight is %s" %(name, weight)
print "\nformat style."
int_num = 123
float_num = 1.23
str_str = "str"
print "%d %f %s" %(int_num, float_num, str_str)
print "%10d %10f %10s" %(int_num, float_num, str_str)
print "%-10d %-10f %-10s" %(int_num, float_num, str_str)
print "%10.4d %10.4f %10.4s" %(int_num, float_num, str_str)
print "%.4d %.4f %.4s" %(int_num, float_num, str_str)
print "%*.*d %*.*f %*.*s" %(10 , 4 , int_num, 10 , 4 , float_num, 10 , 4 , str_str)
format int.
123
123
7b
173
format float.
1.23
1.230000
1.230000e+00
1.23
format interpolation.
My job is System Designer
My name is Allen Moore. His weight is 128
format style.
123 1.230000 str
123 1.230000 str
123 1.230000 str
0123 1.2300 str
0123 1.2300 str
0123 1.2300 str
int_num = 123
float_num = 1.23
str_str = "str"
print "{} {} {}" .format(int_num, float_num, str_str)
print "{2} {0} {1}" .format(int_num, float_num, str_str)
print "{int_num} {float_num} {str_str}" .format(int_num=123 , float_num=1.23 , str_str="str" )
one = {'int_num' :123 , 'float_num' :1.23 , 'str_str' :"str" }
print "{0[int_num]} {0[float_num]} {0[str_str]}{1}" .format(one,' other' )
print "{0:d} {1:f} {2:s}" .format(int_num, float_num, str_str)
print "\nHere is format style"
print "{int_num:d} {float_num:f} {str_str:s}" .format(int_num=123 , float_num=1.23 , str_str="str" )
print "{0:10d} {1:10f} {2:10s}" .format(int_num, float_num, str_str)
print "{0:>10d} {1:>10f} {2:>10s}" .format(int_num, float_num, str_str)
print "{0:<10d} {1:<10f} {2:<10s}" .format(int_num, float_num, str_str)
print "{0:^10d} {1:^10f} {2:^10s}" .format(int_num, float_num, str_str)
print "{0:>10d} {1:>10.4f} {2:10.2s}" .format(int_num, float_num, str_str)
print "{0:!^25s}" .format("Allen Moore" )
123 1.23 str
str 123 1.23
123 1.23 str
123 1.23 str other
123 1.230000 str
Here is format style
123 1.230000 str
123 1.230000 str
123 1.230000 str
123 1.230000 str
123 1.230000 str
123 1.2300 st
!!!!!!!Allen Moore!!!!!!!
使用正则表达式匹配
import re
source = "Allen Moore"
result = re.match("^All" , source)
if result:
print result.group()
result = re.match("Moo" , source)
if result:
print result.group()
result = re.match(".*Moo" , source)
if result:
print result.group()
print "\n"
result = re.search("Moo" , source)
if result:
print result.group()
print "\n"
result = re.findall("o" , source)
print result
print "\n"
result = re.split("e" , source)
print result
print "\n"
result = re.sub("e" ,"?" , source)
print result
print "\n"
import string
printable = string.printable
print len(printable)
print printable[0 :50 ]
print printable[50 :]
print re.findall('\d' ,printable)
print re.findall('\w' ,printable)
print re.findall('\s' ,printable)
x = 'abc' + '-/*' + '\u00ea' + '\u0115'
print re.findall('\w' , x)
print "\n"
src = '''Hello Allen Moore, Welcome to Python World.'''
print re.findall('Welcome' , src)
print re.findall('Welcome|Python' , src)
print re.findall('Hello' , src)
print re.findall('^Hello' , src)
print re.findall('World.$' , src)
print re.findall('World\.$' , src)
print re.findall('[oW]or' , src)
print re.findall('[o]+' , src)
print re.findall('Allen (?=Moore)' , src)
print re.findall('(?<=Allen) Moore' , src)
print re.findall('\bAllen' , src)
print re.findall(r'\bAllen' , src)
print "\n"
result = re.search(r'(. Allen\b).*(\bPython)' , src)
print result.group()
print result.groups()
result = re.search(r'(?P<Name>. Allen\b).*(?P<Lang>\bPython)' ,src)
print result.group()
print result.groups()
print result.group('Name' )
print result.group('Lang' )
All
Allen Moo
Moo
['o', 'o']
['All', 'n Moor', '']
All?n Moor?
100
0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN
OPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_']
[' ', '\t', '\n', '\r', '\x0b', '\x0c']
['a', 'b', 'c', 'u', '0', '0', 'e', 'a', 'u', '0', '1', '1', '5']
['Welcome']
['Welcome', 'Python']
['Hello']
['Hello']
['World.']
['World.']
['oor', 'Wor']
['o', 'oo', 'o', 'o', 'o', 'o']
['Allen ']
[' Moore']
[]
['Allen']
o Allen Moore, Welcome to Python
('o Allen', 'Python')
o Allen Moore, Welcome to Python
('o Allen', 'Python')
o Allen
Python