cat():字串拼接
import pandas as pd
df={'name':['Rose','Joy','Mike'},
'address':['Landon','Newyork','Paris'],
'height':['mid:176cm_middle','low:169cm_lowest','high:180_highest'],
'salary':['2,300dollar','3,200dollar','5,800dollar']}
df=pd.DataFrame(df)
df['name'].str.cat(df['address'],dep=':'+'-'*2
contains():包含
df['address'].str.contains('L')
startswith(),endswith(): 开始包含,结尾包含,用法同上
count(): 计算字串中字符个数,用法同上
get(): 得到指定位置的子串
df['height'].str.split(":").str.get(1)
len(): 计算字串长度
upper,lower大小写转换
pad,center: 指定位置添加字符:
df['address'].str.pad(3,side="left",fillchar='#') #同ljust
df['address'].str.pad(3,side="right",fillchar='#') #同rjust
df['address'].str.center(3,fillchar='#')
repeat:重复子串若干次
slice_replace:替换指定位置字符
replace:替换指定字符
df['salary'].str.replace("\d+\.\d+","$")
split+expand:扩展数据列
df[["height_1","height_f"]]=df["height"].str.split(":",expand=True)
split+join :分隔后以特定字符连接子串
df["height"].str.split(":").str.join("#"*3)
strip,ltrip,rtrip:去除空格、回车符
findall:用正则匹配字符串
df["height"].str.findall("[a-zA-Z]+")
extract,extractall:用正则抽取匹配的字串.注意加上括号
df["height"].str.extract("([a-zA-Z]+)")
df["height"].str.extractall("([a-zA-Z]+)") #取得复合的索引
df["height"].str.extract("([a-zA-Z]+).*?([a-zA-Z]+)",expand=True)