python笔记 str方法

最新推荐文章于 2024-07-02 08:29:45 发布

weixin_41948344

最新推荐文章于 2024-07-02 08:29:45 发布

阅读量585

点赞数

分类专栏： python note 文章标签： python

本文链接：https://blog.csdn.net/weixin_41948344/article/details/81506402

版权

python note 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

参考：https://www.cnblogs.com/f-ck-need-u/archive/2018/06/03/9127699.html

http://python.jobbole.com/85122/

1.大小写转换

>>> print('ab XY'.lower())#小写
ab xy
>>> print('ab XY'.upper())#大写
AB XY

>>> print('ab XY'.title())#标题， 'ab XY'，'ab_xy'，'ab()XY'都属两个单词
Ab Xy
>>> print('ab_xy'.title())
Ab_Xy

>>> print("ab'xy".title())
Ab'Xy

>>> print('ab XY'.capitalize())#仅首单词首字母大写
Ab xy
>>> print('ab_xy'.capitalize())
Ab_xy
>>> print('AB_Xy'.capitalize())
Ab_xy

>>> print('ab XY'.swapcase())
AB xy

2.isXXXX判断

#str.isalnum()
#字符串和数字的任意组合，支持大小写字母、中文，当str.isalpha(),str.isdecimal(),str.isdigit(),str.isnumeric()中任意一个为真，str.isalnum为真

#str.isalpha():Unicode 字符数据库中作为 “Letter”（这些字符一般具有 “Lm”, “Lt”, “Lu”, “Ll”, or “Lo” 等标识，不同于 Alphabetic）的，均为真。
'abc'.isalpha()#True
'ab c'.isalpha()#False,包含space
'中文'.isalpha()#True

str.isdecimal(); str.isdigit(); str.isnumeric()
#三个方法的区别在于对 Unicode 通用标识的真值判断范围不同：

#isdecimal: Nd,
#isdigit: No, Nd,
#isnumeric: No, Nd, Nl

num = 'u2155'
print(num)
# ⅕
num.isdecimal(), num.isdigit(), num.isnumeric()
# (False, False, True)

num = 'u00B2'
print(num)
# ²
num.isdecimal(), num.isdigit(), num.isnumeric()
# (False, True, True)

num = "1" #unicode
num.isdecimal(), num.isdigit(), num.isnumeric()
# (Ture, True, True)

num = "'Ⅶ'"
num.isdecimal(), num.isdigit(), num.isnumeric()
# (False, False, True)

num = "十"
num.isdecimal(), num.isdigit(), num.isnumeric()
# (False, False, True)

num = b"1" # byte
num.isdigit() # True
num.isdecimal() # AttributeError 'bytes' object has no attribute 'isdecimal'
num.isnumeric() # AttributeError 'bytes' object has no attribute 'isnumeric'

>>> print('a34'.islower())
True

>>> print('AB'.isupper())
True
>>> print('Aa'.isupper())
False

>>> print('Aa Bc'.istitle())
True
>>> print('Aa_Bc'.istitle())
True
>>> print('Aa bc'.istitle())
False
>>> print('Aa_bc'.istitle())
False
# 下面的返回False，因为非首字母C不是小写
>>> print('Aa BC'.istitle())
False

str.isspace()#判断是否为空白，空字符('')不为空白
>>> print(''.isspace())
False
>>> print(' '.isspace())
True
>>> print(' \t'.isspace())
True
>>> print(' \n'.isspace())
True
>>> print('ab xy'.isspace())
False

str.isprintable()#判断是否为可打印字符
>>> print('\n'.isprintable())
False
>>> print('\t'.isprintable())
False
>>> print(''.isprintable())
True
>>> print('ab'.isprintable())
True
>>> print(' '.isprintable())
True

str.isidentifier()#判断是否满足标识符定义规则(只能是字母或下划线开头、不能包含除数字、字母和下划线以外的任意字符)
>>> print('ab'.isidentifier())
True
>>> print('2ab'.isidentifier())
False
>>> print('ab2'.isidentifier())
True
>>> print('_ab2'.isidentifier())
True
>>> print('ab xy'.isidentifier())
False

3.填充

str.ceter(width[,fillchar])
#将字符串居中，左右两边使用fillchar进行填充，使得整个字符串的长度为width。fillchar默认为空格。如果width小于字符串的长度，则无法填充直接返回字符串本身(不会创建新字符串对象)。
#默认fillchar为space
>>> print('ab'.center(4))
ab #lenth==4
>>> print('ab'.center(5))
ab #lenth==5
>>> print('ab'.center(5,'-'))
--ab-
>>> print('ab'.center(4,'-'))
-ab-
>>> print('ab'.center(1,'-'))
ab#lenth<width,return str

str.ljust(width[,fillchar])#使用fillchar填充在字符串S的右边，使得整体长度为width
str.rjust(width[,fillchar])#使用fillchar填充在字符串S的左边，使得整体长度为width
#默认fillchar为space
>>> print('ab'.ljust(4,'-'))
ab--
>>> print('ab'.rjust(4,'-'))
--ab

str.zfill(width)
#用0填充在字符串S的左边使其长度为width。如果S前有正负号+/-，则0填充在这两个符号的后面，且符号也算入长度
>>> print('ab'.zfill(5))
000ab
>>> print('-ab'.zfill(5))
-00ab
>>> print('+ab'.zfill(5))
+00ab
>>> print('-10'.zfill(5))
-0010
>>> print('10'.zfill(5))
00010
>>> print('+10'.zfill(5))
+0010

4.子串搜索

str.count(sub[,start[,end]])
#返回字符串S中子串sub出现的次数，可以指定从哪里开始计算(start)以及计算到哪里结束(end)，索引从0开始计算，不包括end边界。
>>> print('xyabxyxy'.count('xy'))
3
>>> print('xyabxyxy'.count('xy',1))
2 # 次数2，因为从index=1算起，即从'y'开始查找，查找的范围为'yabxyxy'
>>> print('xyabxyxy'.count('xy',1,7))
1 # 次数1，因为不包括end，所以查找的范围为'yabxyx'
>>> print('xyabxyxy'.count('xy',1,8))
2 # 次数2，因为查找的范围为'yabxyxy'

str.endswith(suffix[,start[,end]])
str.startswith(prefix[,start[,end]])
#endswith()检查字符串S是否已suffix(后缀)结尾，返回布尔值的True和False。suffix可以是一个元组(tuple)。可以指定起始start和结尾end的搜索边界。
#同理startswith()用来判断字符串S是否是以prefix(前缀)开头
#不包含end
suffix是普通的字符串时。
>>> print('abcxyz'.endswith('xyz'))
True
>>> print('abcxyz'.endswith('xyz',4))
False # False，因为搜索范围为'yz'
>>> print('abcxyz'.endswith('xyz',0,5))
False # False，因为搜索范围为'abcxy'
>>> print('abcxyz'.endswith('xyz',0,6))
True

suffix是元组(tuple)时，只要tuple中任意一个元素满足endswith的条件，就返回True。
>>> print('abcxyz'.endswith(('ab','xyz')))
True # tuple中的'xyz'满足条件
>>> print('abcxyz'.endswith(('ab','xy')))
False # tuple中'ab'和'xy'都不满足条件
>>> print('abcxyz'.endswith(('ab','xy','z')))
True # tuple中的'z'满足条件

str.find(sub[,start[,end]])
str.rfind(sub[,start[,end]])
str.index(sub[,start[,end]])
str.rindec(sub[,start[,end]])
# find()搜索字符串S中是否包含子串sub，如果包含，则返回sub的索引位置，否则返回"-1"。可以指定起始start和结束end的搜索位置。
# index()和find()一样，唯一不同点在于当找不到子串时，抛出ValueError错误。
# rfind()则是返回搜索到的最右边子串的位置，如果只搜索到一个或没有搜索到子串，则和find()是等价的。
# 同理rindex()

>>> print('asdfgh'.find('df'))
2
>>> print('asdfgh'.rfind('df'))
2 # find和rfind都是找到的返回子串索引值
>>> print('asdfgh'.find('u'))
-1
>>> print('asdfgh'.find('df',1,3))
-1 # 'sd',not include 'f'
>>> print('asdfgh'.index('z'))
Traceback (most recent call last):
File "<pyshell#97>", line 1, in <module>
print('asdfgh'.index('z'))
ValueError: substring not found

5.替换

str.repalce(old,new[,count])
#将字符串中的子串old替换为new字符串，如果给定count，则表示只替换前count个old子串。如果S中搜索不到子串old，则无法替换，直接返回字符串S(不创建新字符串对象)

str.expandtabs(N=8)
#将字符串S中的\t替换为一定数量的空格。默认N=8。
#注意，expandtabs(8)不是将\t直接替换为8个空格。例如'xyz\tab'.expandtabs()会将\t替换为5个空格，因为"xyz"占用了3个字符位。
#另外，它不会替换换行符(\n或\r)
>>> '01\t012\t0123\t01234'.expandtabs(4)
'01 012 0123 01234'
’123|123|123|123|123|1
>>> '01\t012\t0123\t01234'.expandtabs(8)
'01 012 0123 01234'
‘1234567|1234567|1234567|1234567|
>>> '01\t012\t0123\t01234'.expandtabs(7)
'01 012 0123 01234'
‘123456|123456|123456|123456|
>>> print('012\t0123\n01234'.expandtabs(7))
'012 0123
'01234
’123456|123456|

str.translate(table)
static str.maketrans(x[,y[,z]])
#str.maketrans()生成一个字符一一映射的table，然后使用translate(table)对字符串S中的每个字符进行映射。
>>> in_str = 'abcdefghijklmnopqrstuvwxyz'
>>> out_str = '0123456789`-=[];,./_+!@#$%'
>>> map_table = str.maketrans(in_str, out_str)#生成映射表
>>> map_table
{97: 48, 98: 49, 99: 50, 100: 51, 101: 52, 102: 53, 103: 54, 104: 55, 105: 56, 106: 57, 107: 96, 108: 45, 109: 61, 110: 91, 111: 93, 112: 59, 113: 44, 114: 46, 115: 47, 116: 95, 117: 43, 118: 33, 119: 64, 120: 35, 121: 36, 122: 37}
>>> print('this is dudebla\'s note')
this is dudebla's note
>>> print('this is dudebla\'s note'.translate(map_table))#translate()进行映射_78/ 8/ 3+341-0'/ []_4
#注意，maketrans(x[, y[, z]])中的x和y都是字符串，且长度必须相等。
#如果maketrans(x[, y[, z]])给定了第三个参数z，这这个参数字符串中的每个字符都会被映射为None。

6.分割

str.partition(sep)
str.rpartition(sep)
#搜索字符串S中的子串sep，并从sep处对S进行分割，最后返回一个包含3元素的元组：sep左边的部分是元组的第一个元素，sep自身是元组的二个元素，sep右边是元组的第三个元素。
#partition(sep)从左边第一个sep进行分割，rpartition(sep)从右边第一个sep进行分割。
#如果搜索不到sep，则返回的3元素元组中，有两个元素为空。partition()是后两个元素为空，rpartition()是前两个元素为空。

str.split(sep=None,maxsplit=-1)
str.rsplit(sep=None,maxsplit=-1)
str.splitlines([keepends=True])
#都是用来分割字符串，(删除sep，即不包含sep),并生成一个列表。
#split()根据sep对S进行分割，maxsplit用于指定分割次数，如果不指定maxsplit或者给定值为"-1"，则会从做向右搜索并且每遇到sep一次就分割直到搜索完字符串。如果不指定sep或者指定为None，则改变分割算法：以空格为分隔符，且将连续的空白压缩为一个空格。
#rsplit()和split()是一样的，只不过是从右边向左边搜索
>>> print('1,2,3'.split(','))
['1', '2', '3']
>>> print('1,2,,3'.split(','))
['1', '2', '', '3']
>>> print('1 2 3'.split())
['1', '2', '3']
>>> print(' 1 2 3 '.split())
['1', '2', '3']
>>> print(' 1 2 3 '.split(' '))
['', '', '1', '', '2', '', '3', '', '']
>>> print(' 1 2 3 \n'.split())
['1', '2', '3']
>>> print(' 1 2 3 \n'.split(' '))
['', '', '1', '', '2', '', '3', '', '\n']
>>> print(' 1 2 3 \n'.split('\t'))
[' 1 2 3 \n']
>>> print(' 1 2 3 \n'.split('\n'))
[' 1 2 3 ', '']
>>> print(''.split('\n'))
['']

splitlines()中可以指定各种换行符，常见的是\n、\r、\r\n。如果指定keepends为True，则保留所有的换行符。
>>> 'ab c\n\nde fg\rkl\r\n'.splitlines()
['ab c', '', 'de fg', 'kl']
>>> 'ab c\n\nde fg\rkl\r\n'.splitlines(keepends=True)
['ab c\n', '\n', 'de fg\r', 'kl\r\n']

将split()和splitlines()相比较一下：
#### split()
>>> ''.split('\n')
[''] # 因为没换行符可分割
>>> 'One line\n'.split('\n')
['One line', '']
#### splitlines()
>>> "".splitlines()
[] # 因为没有换行符可分割
>>> 'Two lines\n'.splitlines()
['Two lines']

7.join

str.join(iterable)
#将可迭代对象(iterable)中的字符串使用S连接起来。注意，iterable中必须全部是字符串类型，否则报错。

#相对多次 str += 'other_str' ，用列表存储将要拼接的'other_str' ，再使用 join 效率更高

#字符串
>>> L='python'
>>> '_'.join(L)
'p_y_t_h_o_n'
#元组
>>> L1=('1','2','3')
>>> '_'.join(L1)
'1_2_3'
#集合。注意，集合无序。
>>> L2={'p','y','t','h','o','n'}
>>> '_'.join(L2)
'n_o_p_h_y_t'
#列表
>>> L2=['py','th','o','n']
>>> '_'.join(L2)
'py_th_o_n'
#字典
>>> L3={'name':"malongshuai",'gender':'male','from':'China','age':18}
>>> '_'.join(L3)
'name_gender_from_age'
#iterable参与迭代的部分必须是字符串类型，不能包含数字或其他类型。
>>> L1=(1,2,3)
>>> '_'.join(L1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: sequence item 0: expected str instance, int found
#以下两种也不能join。
>>> L1=('ab',2)
>>> L2=('AB',{'a','cd'})

8.修剪：strip、lstrip和rstrip

分别是移除左右两边、左边、右边的字符char。如果不指定chars或者指定为None，则默认移除空白(空格、制表符、换行符)。

唯一需要注意的是，chars可以是多个字符序列。在移除时，只要是这个序列中的字符，都会被移除。

例如：

移除单个字符或空白。

>>> '   spacious   '.lstrip()
'spacious   '
　
>>> '   spacious   '.rstrip()
'   spacious'
　
>>> 'spacious   '.lstrip('s')
'pacious   '
　
>>> 'spacious'.rstrip('s')
'spaciou'

2.移除字符中的字符。

>>> print('www.example.com'.lstrip('cmowz.'))
example.com
>>> print('wwwz.example.com'.lstrip('cmowz.'))
example.com
>>> print('wwaw.example.com'.lstrip('cmowz.'))
aw.example.com
>>> print('www.example.com'.strip('cmowz.'))
'example'

由于www.example.com的前4个字符都是字符序列cmowz.中的字符，所以都被移除，而第五个字符e不在字符序列中，所以修剪到此结束。同理wwwz.example.com。

wwaw.example.com中第3个字符a不是字符序列中的字符，所以修剪到此结束。