【脚本语言系列】关于Python操作数据文本字符串，你需要知道的事

最新推荐文章于 2024-06-16 11:15:40 发布

Allen Moore

最新推荐文章于 2024-06-16 11:15:40 发布

阅读量469

点赞数

分类专栏：脚本语言文章标签：脚本语言 python 文本字符串

本文链接：https://blog.csdn.net/MosesAaron/article/details/73740853

版权

脚本语言专栏收录该内容

129 篇文章 1 订阅

订阅专栏

如何使用文本字符串

Unicode

# -*- coding:utf-8 -*-
import unicodedata
# only for Python 3.*
def unicode_test(value):
    name = unicodedata.name(value)
    value2 = unicodedata.lookup(name)
    print "name %s" %name
    print "value %s" %value
    print "value2 %s" %value2

unicode_test("\u00a2")
unicode_test("\u20ac")

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-42-9a9ee43694fd> in <module>()
      9     print "value2 %s" %value2
     10 
---> 11 unicode_test("\u00a2")
     12 unicode_test("\u20ac")


<ipython-input-42-9a9ee43694fd> in unicode_test(value)
      3 # only for Python 3.*
      4 def unicode_test(value):
----> 5     name = unicodedata.name(value)
      6     value2 = unicodedata.lookup(name)
      7     print "name %s" %name


TypeError: name() argument 1 must be unicode, not str

# -*- coding:utf-8 -*-
# only for Pyhton 3.*
# encode to string
snowman = "\u2603"
print len(snowman)
ds = snowman.encode("utf-8")
print ds
ds = snowman.encode("ascii","ignore")
print ds
ds = snowman.encode("ascii","replace")
print ds
ds = snowman.encode("ascii","backslashreplace")
print ds
ds = snowman.encode("ascii","xmlcharrefreplace")
print ds
# decode to unicode
place = "caf\u00e9"
print place
print type(place)
place_bytes = place.decode("utf-8")
print place_bytes
place2 = place_bytes.decode("utf-8")
print place2
place3 = place_bytes.decode("ascii")
print place3
place4 = place_bytes.decode("latin-1")
print place4
place5 = place_bytes.decode("windows-1252")
print place5

6
\u2603
\u2603
\u2603
\u2603
caf\u00e9
<type 'str'>
caf\u00e9
caf\u00e9
caf\u00e9
caf\u00e9
caf\u00e9

格式化

旧式格式化

# -*- coding:utf-8 -*-
# format int to str, decimal, heximal, octal
print "format int."
int_num = 123
print "%s" %int_num
print "%d" %int_num
print "%x" %int_num
print "%o" %int_num

# format float to str, decimal, heximal, octal
print "\nformat float."
float_num = 1.23
print "%s" %float_num
print "%f" %float_num
print "%e" %float_num
print "%g" %float_num

# interpolation
print "\nformat interpolation."
name = "Allen Moore"
job = "System Designer"
weight = "128"
print "My job is %s" %job
print "My name is %s. His weight is %s" %(name, weight)

# format style
print "\nformat style."
int_num = 123
float_num = 1.23
str_str = "str"
print "%d %f %s" %(int_num, float_num, str_str)
print "%10d %10f %10s" %(int_num, float_num, str_str)
print "%-10d %-10f %-10s" %(int_num, float_num, str_str)
print "%10.4d %10.4f %10.4s" %(int_num, float_num, str_str)
print "%.4d %.4f %.4s" %(int_num, float_num, str_str)
print "%*.*d %*.*f %*.*s" %(10, 4, int_num, 10, 4, float_num, 10, 4, str_str)

format int.
123
123
7b
173

format float.
1.23
1.230000
1.230000e+00
1.23

format interpolation.
My job is System Designer
My name is Allen Moore. His weight is 128

format style.
123 1.230000 str
       123   1.230000        str
123        1.230000   str       
      0123     1.2300        str
0123 1.2300 str
      0123     1.2300        str

新式格式化

# -*- coding:utf-8 -*-
int_num = 123
float_num = 1.23
str_str = "str"
print "{} {} {}".format(int_num, float_num, str_str)
print "{2} {0} {1}".format(int_num, float_num, str_str)
print "{int_num} {float_num} {str_str}".format(int_num=123, float_num=1.23, str_str="str")
one = {'int_num':123, 'float_num':1.23, 'str_str':"str"}
print "{0[int_num]} {0[float_num]} {0[str_str]}{1}".format(one,' other')
print "{0:d} {1:f} {2:s}".format(int_num, float_num, str_str)
# format style
print "\nHere is format style"
print "{int_num:d} {float_num:f} {str_str:s}".format(int_num=123, float_num=1.23, str_str="str")
print "{0:10d} {1:10f} {2:10s}".format(int_num, float_num, str_str)
print "{0:>10d} {1:>10f} {2:>10s}".format(int_num, float_num, str_str)
print "{0:<10d} {1:<10f} {2:<10s}".format(int_num, float_num, str_str)
print "{0:^10d} {1:^10f} {2:^10s}".format(int_num, float_num, str_str)
print "{0:>10d} {1:>10.4f} {2:10.2s}".format(int_num, float_num, str_str)
print "{0:!^25s}".format("Allen Moore")

123 1.23 str
str 123 1.23
123 1.23 str
123 1.23 str other
123 1.230000 str

Here is format style
123 1.230000 str
       123   1.230000 str       
       123   1.230000        str
123        1.230000   str       
   123      1.230000     str    
       123     1.2300 st        
!!!!!!!Allen Moore!!!!!!!

使用正则表达式匹配

# -*- coding:utf-8 -*-
import re
# match
# match preposition
source = "Allen Moore"
result = re.match("^All", source)
if result:
    print result.group()
result = re.match("Moo", source)
if result:
    print result.group()
# match anyposition
result = re.match(".*Moo", source)
if result:
    print result.group()

# search  
# search first 
print "\n"
result = re.search("Moo", source)
if result:
    print result.group()    

# findall
# findall all
print "\n"
result = re.findall("o", source)
print result

# split
# split all
print "\n"
result = re.split("e", source)
print result

# sub
# sub all
print "\n"
result = re.sub("e","?", source)
print result

# pattern: special character
print "\n"
import string
printable = string.printable
print len(printable)
print printable[0:50]
print printable[50:]
print re.findall('\d',printable)
print re.findall('\w',printable)
print re.findall('\s',printable)

x = 'abc' + '-/*' + '\u00ea' + '\u0115'
print re.findall('\w', x)

# pattern: character
print "\n"
src = '''Hello Allen Moore, Welcome to Python World.'''
print re.findall('Welcome', src)
print re.findall('Welcome|Python', src)
print re.findall('Hello', src)
print re.findall('^Hello', src)
print re.findall('World.$', src)
print re.findall('World\.$', src)
print re.findall('[oW]or', src)
print re.findall('[o]+', src)
print re.findall('Allen (?=Moore)', src)
print re.findall('(?<=Allen) Moore', src)
print re.findall('\bAllen', src)
print re.findall(r'\bAllen', src)

# pattern: character
print "\n"
result = re.search(r'(. Allen\b).*(\bPython)', src)
print result.group()
print result.groups()
result = re.search(r'(?P<Name>. Allen\b).*(?P<Lang>\bPython)',src)
print result.group()
print result.groups()
print result.group('Name')
print result.group('Lang')

All
Allen Moo


Moo


['o', 'o']


['All', 'n Moor', '']


All?n Moor?


100
0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN
OPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~  

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_']
[' ', '\t', '\n', '\r', '\x0b', '\x0c']
['a', 'b', 'c', 'u', '0', '0', 'e', 'a', 'u', '0', '1', '1', '5']


['Welcome']
['Welcome', 'Python']
['Hello']
['Hello']
['World.']
['World.']
['oor', 'Wor']
['o', 'oo', 'o', 'o', 'o', 'o']
['Allen ']
[' Moore']
[]
['Allen']


o Allen Moore, Welcome to Python
('o Allen', 'Python')
o Allen Moore, Welcome to Python
('o Allen', 'Python')
o Allen
Python