1 字符串操作
import pandas as pd
import numpy as np
s = pd.Series(['A','b','B','gaer','AGER',np.nan])
s
s.str.lower() # 把所有字母都变成小写
s.str.upper() # 把所有字母都变成大写
s.str.len() # 返回每个数据元素的长度
index = pd.Index([' tang',' yu ','di'])
index # Index([' tang', ' yu ', 'di'], dtype='object')
index.str.strip() # Index(['tang', 'yu', 'di'], dtype='object')
index.str.rstrip() # Index([' tang', ' yu', 'di'], dtype='object')
df = pd.DataFrame(np.random.randn(3,2),columns = ['A a','B b'],index = range(3))
df.columns = df.columns.str.replace(' ','') # 把列名中的空格替换为空
s = pd.Series(['a_b_C','c_d_e','f_g_h'])
#0 a_b_C
#1 c_d_e
#2 f_g_h
#dtype: object
s.str.split('_')
#0 [a, b, C]
#1 [c, d, e]
#2 [f, g, h]
#dtype: object
s.str.split('_',expand = True)
# 0 1 2
#0 a b C
#1 c d e
#2 f g h
s.str.split('_',expand = True,n=1) # 参数n用于限制,只能切一次
# 0 1
#0 a b_C
#1 c d_e
#2 f g_h
s = pd.Series(['A','Aas','Afgew','Ager','Agre','Ager'])
#0 A
#1 Aas
#2 Afgew
#3 Ager
#4 Agre
#5 Ager
#dtype: object
s.str.contains('Ag') # 返回true, flase的序列
#0 False
#1 False
#2 False
#3 True
#4 True
#5 True
#dtype: bool
s = pd.Series(['a','a|b','a|c'])
#0 a
#1 a|b
#2 a|c
#dtype: object
s.str.get_dummies(sep = '|')
# a b c
#0 1 0 0
#1 1 1 0
#2 1 0 1