字符串操作
by deamon(deamoncao@163.com)
python使用频度非常高的字符串相关操作。
去空格及特殊符号
In [24]:
s = ' hello, world!'
= ' hello, world!'
print(s.strip())
(s.strip())
print(s.lstrip(' hello, '))
(s.lstrip(' hello, '))
print(s.rstrip('!'))
(s.rstrip('!'))
print(s.strip(' he'))
(s.strip(' he'))
print(s.strip('llo, '))
(s.strip('llo, '))
hello, world!
world!
hello, world
llo, world!
hello, world!
连接字符串
In [25]:
sStr1 = 'strcat'
= 'strcat'
sStr2 = 'append'
= 'append'
sStr1 += sStr2
+= sStr2
print(sStr1)
(sStr1)
<em># 字符串拼接函数</em>
import operator
operator
sStr1 = 'strcat'
= 'strcat'
sStr2 = 'append'
= 'append'
sStr3 = operator.concat(sStr1,sStr2) #concat为字符串拼接函数
= operator.concat(sStr1,sStr2) #concat为字符串拼接函数
print(sStr3)
(sStr3)
strcatappend
strcatappend
查找字符
In [26]:
<em># < 0 为未找到</em>
sStr1 = 'strchr'
= 'strchr'
sStr2 = 'r'
= 'r'
nPos = sStr1.index(sStr2)
= sStr1.index(sStr2)
print(nPos)
(nPos)
2
比较字符串
python2.x中使用cmp函数对字符串进行比较
python3.x中不再支持cmp函数,而是利用operator模块方法比较
包含的方法有:
lt(a, b) ———— 小于
le(a, b) ———— 小于等于
eq(a, b) ———— 等于
ne(a, b) ———— 不等于
ge(a, b) ———— 大于等于
gt(a, b) ———— 大于
In [27]:
import operator #首先要导入运算符模块
operator #首先要导入运算符模块
sStr1 = 'strchr'
= 'strchr'
sStr2 = 'strch'
= 'strch'
print(operator.gt(sStr1,sStr2)) #意思是greater than(大于)
(operator.gt(sStr1,sStr2)) #意思是greater than(大于)
print(operator.ge(sStr1,sStr2)) #意思是greater and equal(大于等于)
(operator.ge(sStr1,sStr2)) #意思是greater and equal(大于等于)
print(operator.eq(sStr1,sStr2)) #意思是equal(等于)
(operator.eq(sStr1,sStr2)) #意思是equal(等于)
print(operator.le(sStr1,sStr2)) #意思是less and equal(小于等于)
(operator.le(sStr1,sStr2)) #意思是less and equal(小于等于)
print(operator.lt(sStr1,sStr2)) #意思是less than(小于)
(operator.lt(sStr1,sStr2)) #意思是less than(小于)
True
True
False
False
False
字符串中的大小写转换
主要有如下方法:
upper ———— 转换为大写
lower ———— 转换为小写
title ———— 转换为标题(每个单词首字母大写)
capitalize ———— 首字母大写
swapcase ———— 大写变小写,小写变大写
In [28]:
sStr1 = 'JCstrlwr'
= 'JCstrlwr'
sStr1 = sStr1.upper()
= sStr1.upper()
<em>#sStr1 = sStr1.lower()</em>
print(sStr1)
(sStr1)
JCSTRLWR
翻转字符串
In [29]:
sStr1 = 'abcdefg'
= 'abcdefg'
sStr1 = sStr1[::-1]
= sStr1[::-1]
print(sStr1)
(sStr1)
gfedcba
查找字符串
find方法:
检测字符串内是否包含子串str
语法为:
str.find(str[,start,end]) #str为要查找的字符串;strat为查找起始位置,默认为0;end为查找终止位置,默认为字符串长度。若找到返回起始位置索引,否则返回-1
In [33]:
sStr1 = 'today is a fine day'
= 'today is a fine day'
sStr2 = 'is'
= 'is'
print(sStr1.find(sStr2))
(sStr1.find(sStr2))
print(sStr1.find(sStr2,3))
(sStr1.find(sStr2,3))
print(sStr1.find(sStr2,7,10))
(sStr1.find(sStr2,7,10))
6
6
-1
分割字符串
In [31]:
sStr1 = 'ab,cde,fgh,ijk'
= 'ab,cde,fgh,ijk'
sStr2 = ','
= ','
sStr1 = sStr1[sStr1.find(sStr2) + 1:]
= sStr1[sStr1.find(sStr2) + 1:]
print(sStr1)
(sStr1)
<em>#或者</em>
s = 'ab,cde,fgh,ijk'
= 'ab,cde,fgh,ijk'
print(s.split(','))
(s.split(','))
cde,fgh,ijk
['ab', 'cde', 'fgh', 'ijk']
字符串序列连接
join方法:
语法为str.join(seq) #seq为元素序列
In [32]:
li = ['hello','world']
= ['hello','world']
str = '-'
= '-'
str.join(li)
.join(li)
Out[32]:
'hello-world'
字符串内替换
replace方法:
把字符串中的旧串替换成新串
语法为:
str.replace(old,new[,max]) #old为旧串,new为新串,max可选,为替换次数
In [34]:
sStr1 = 'today is a find day'
= 'today is a find day'
sStr1.replace('find','rainy')
.replace('find','rainy')
print(sStr1)
(sStr1)
today is a find day
判断字符串组成
主要有如下方法:
isdigit ———— 检测字符串时候只由数字组成
isalnum ———— 检测字符串是否只由数字和字母组成
isalpha ———— 检测字符串是否只由字母组成
islower ———— 检测字符串是否只含有小写字母
isupper ———— 检测字符串是否只含有大写字母
isspace ———— 检测字符串是否只含有空格
istitle ———— 检测字符串是否是标题(每个单词首字母大写)
In [35]:
sStr1 = 'hello'
= 'hello'
print(sStr1.islower())
(sStr1.islower())
print(sStr1.isdigit())
(sStr1.isdigit())
True
False
计算字符串中出现频次最多的字幕
In [20]:
<em>#version 1</em>
import re
re
from collections import Counter
collections import Counter
def get_max_value_v1(text):
get_max_value_v1(text):
text = text.lower()
text = text.lower()
result = re.findall('[a-zA-Z]', text) # 去掉列表中的符号符
result = re.findall('[a-zA-Z]', text) # 去掉列表中的符号符
count = Counter(result) # Counter({'l': 3, 'o': 2, 'd': 1, 'h': 1, 'r': 1, 'e': 1, 'w': 1})
count = Counter(result) # Counter({'l': 3, 'o': 2, 'd': 1, 'h': 1, 'r': 1, 'e': 1, 'w': 1})
count_list = list(count.values())
count_list = list(count.values())
max_value = max(count_list)
max_value = max(count_list)
max_list = []
max_list = []
for k, v in count.items():
for k, v in count.items():
if v == max_value:
if v == max_value:
max_list.append(k)
max_list.append(k)
max_list = sorted(max_list)
max_list = sorted(max_list)
return max_list[0]
return max_list[0]
In [21]:
<em>#version 2</em>
from collections import Counter
collections import Counter
def get_max_value(text):
get_max_value(text):
count = Counter([x for x in text.lower() if x.isalpha()])
count = Counter([x for x in text.lower() if x.isalpha()])
m = max(count.values())
m = max(count.values())
return sorted([x for (x, y) in count.items() if y == m])[0]
return sorted([x for (x, y) in count.items() if y == m])[0]
In [22]:
<em>#version 3</em>
import string
string
def get_max_value(text):
get_max_value(text):
text = text.lower()
text = text.lower()
return max(string.ascii_lowercase, key=text.count)
return max(string.ascii_lowercase, key=text.count)
In [23]:
max(range(6), key = lambda x : x>2)
(range(6), key = lambda x : x>2)
<em># >>> 3</em>
<em># 带入key函数中,各个元素返回布尔值,相当于[False, False, False, True, True, True]</em>
<em># key函数要求返回值为True,有多个符合的值,则挑选第一个。</em>
max([3,5,2,1,4,3,0], key = lambda x : x)
([3,5,2,1,4,3,0], key = lambda x : x)
<em># >>> 5</em>
<em># 带入key函数中,各个元素返回自身的值,最大的值为5,返回5.</em>
max('ah', 'bf', key=lambda x: x[1])
('ah', 'bf', key=lambda x: x[1])
<em># >>> 'ah'</em>
<em># 带入key函数,各个字符串返回最后一个字符,其中'ah'的h要大于'bf'中的f,因此返回'ah'</em>
max('ah', 'bf', key=lambda x: x[0])
('ah', 'bf', key=lambda x: x[0])
<em># >>> 'bf'</em>
<em># 带入key函数,各个字符串返回第一个字符,其中'bf'的b要大于'ah'中的a,因此返回'bf'</em>
text = 'Hello World'
= 'Hello World'
max('abcdefghijklmnopqrstuvwxyz', key=text.count)
('abcdefghijklmnopqrstuvwxyz', key=text.count)
<em># >>> 'l'</em>
<em># 带入key函数,返回各个字符在'Hello World'中出现的次数,出现次数最多的字符为'l',因此输出'l'</em>
Out[23]:
'l'
Count occurrence of a character in a Python string
In [23]:
<em>#T h e M i s s i s s i p p i R i v e r</em>
<em>#[1, 1, 2, 2, 1, 5, 4, 4, 5, 4, 4, 5, 2, 2, 5, 2, 1, 5, 1, 2, 1]</em>
sentence='The Mississippi River'
='The Mississippi River'
def count_chars(s):
count_chars(s):
s=s.lower()
s=s.lower()
count=list(map(s.count,s))
count=list(map(s.count,s))
return (max(count))
return (max(count))
print(count_chars(sentence))
(count_chars(sentence))
5