pandas 相关函数

import pandas as pd
import numpy as np

np.array([1,2,3])
array([1, 2, 3])

#常见函数

np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.linspace(2,3,num=5, endpoint=False)
array([2. , 2.2, 2.4, 2.6, 2.8])
np.random.randn(4,4)
array([[ 0.31090058,  1.36645632,  0.86897091,  0.09761096],
       [ 0.35996995,  1.0850638 ,  0.68081139, -1.54973962],
       [ 1.09726152,  0.85757526,  1.00096873,  1.19056036],
       [-0.71615881,  0.25503605,  0.23917234,  0.53867985]])

numpy.random.randint(low, high=None, size=None, dtype=’l’)

np.random.randint(0,4,size=(2,4))
array([[1, 0, 2, 2],
       [2, 3, 3, 1]])
np.zeros(4)
array([0., 0., 0., 0.])
np.zeros(4).reshape(2,2)
array([[0., 0.],
       [0., 0.]])
df = pd.DataFrame({
    '国家':['中国', '美国', '日本'],
    '地区':['亚洲', '北美','亚洲'],
    '人口':['13.97', '3.28','1.26'],
    'GDP':[14.34,21.43,5.08]
})
df
国家地区人口GDP
0中国亚洲13.9714.34
1美国北美3.2821.43
2日本亚洲1.265.08
s = pd.Series(np.random.randn(5),index = ['a','b','c','d','e'])
s
a   -0.610498
b   -1.111079
c    1.495161
d    1.233846
e    0.854973
dtype: float64

*df = pd.DataFrame(data, index, columns=None)

*利用Series组成字典

d = {
    'x':pd.Series([1,2,3],index=['a','b','c']),
    'y':pd.Series([1,2,3,4],index=['a','b','c','d'])
}
pd.DataFrame(d)
xy
a1.01
b2.02
c3.03
dNaN4

建立索引

data = 'https://www.gairuo.com/file/data/dataset/team.xlsx'
df = pd.read_excel(data,index_col='name')
df
teamQ1Q2Q3Q4
name
LiverE89212464
ArryC36373757
AckA57601884
EorgeC93967178
OahD65496186
..................
GabrielC48598774
Austin7C21313043
Lincoln4C9893120
EliE11745891
BenE21434174

100 rows × 5 columns

df = pd.read_excel(data)
df
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df.set_index('name')
teamQ1Q2Q3Q4
name
LiverE89212464
ArryC36373757
AckA57601884
EorgeC93967178
OahD65496186
..................
GabrielC48598774
Austin7C21313043
Lincoln4C9893120
EliE11745891
BenE21434174

100 rows × 5 columns

s = pd.Series([i for i in range(100)])
s
0      0
1      1
2      2
3      3
4      4
      ..
95    95
96    96
97    97
98    98
99    99
Length: 100, dtype: int64
df.set_index(s)
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

重置索引

df.reset_index()
indexnameteamQ1Q2Q3Q4
00LiverE89212464
11ArryC36373757
22AckA57601884
33EorgeC93967178
44OahD65496186
........................
9595GabrielC48598774
9696Austin7C21313043
9797Lincoln4C9893120
9898EliE11745891
9999BenE21434174

100 rows × 7 columns

df
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df.index
RangeIndex(start=0, stop=100, step=1)
df.reset_index(drop=True)
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df.index
RangeIndex(start=0, stop=100, step=1)
df.head()
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186

dataframe.reset_index(drop=True, inplace=True)

df.tail()
nameteamQ1Q2Q3Q4
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174
df.columns
Index(['name', 'team', 'Q1', 'Q2', 'Q3', 'Q4'], dtype='object')
df.describe()
Q1Q2Q3Q4
count100.000000100.000000100.000000100.000000
mean49.20000052.55000052.67000052.780000
std29.96260329.84518126.54367727.818524
min1.0000001.0000001.0000002.000000
25%19.50000026.75000029.50000029.500000
50%51.50000049.50000055.00000053.000000
75%74.25000077.75000076.25000075.250000
max98.00000099.00000099.00000099.000000
df.mean()
E:\Anaconda\envs\py37\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  """Entry point for launching an IPython kernel.





Q1    49.20
Q2    52.55
Q3    52.67
Q4    52.78
dtype: float64
df.mean(1)
E:\Anaconda\envs\py37\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  """Entry point for launching an IPython kernel.





0     49.50
1     41.75
2     54.75
3     84.50
4     65.25
      ...  
95    67.00
96    31.25
97    53.00
98    58.50
99    44.75
Length: 100, dtype: float64
df.value_counts()
name     team  Q1  Q2  Q3  Q4
Aaron    A     96  75  55  8     1
Liam     B     2   80  24  25    1
Matthew  C     44  33  41  98    1
Mason    D     80  96  26  49    1
Luke6    D     15  97  95  99    1
                                ..
Elliott  B     9   31  33  60    1
Elliot   C     15  17  76  22    1
Elijah   B     97  89  15  46    1
Eli      E     11  74  58  91    1
Zachary  E     12  71  85  93    1
Length: 100, dtype: int64
df
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df['team'].value_counts()
C    22
B    22
E    20
D    19
A    17
Name: team, dtype: int64
df.groupby(by = ['team']).count()
nameQ1Q2Q3Q4
team
A1717171717
B2222222222
C2222222222
D1919191919
E2020202020

diff() : 用于上一个数据和下一个数据的差值
shift()

df
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df['Q1'].shift()
0      NaN
1     89.0
2     36.0
3     57.0
4     93.0
      ... 
95    20.0
96    48.0
97    21.0
98    98.0
99    11.0
Name: Q1, Length: 100, dtype: float64

value_counts(values,sort=True, ascending=False, normalize=False,bins=None,dropna=True)

truncate(before= , after= )

Pandas 高级操作

df.Q1>36
0      True
1     False
2      True
3      True
4      True
      ...  
95     True
96    False
97     True
98    False
99    False
Name: Q1, Length: 100, dtype: bool
df.loc[:,'Q1':'Q3']
Q1Q2Q3
0892124
1363737
2576018
3939671
4654961
............
95485987
96213130
9798931
98117458
99214341

100 rows × 3 columns

df.query()

df.query('Q1>Q2>90')
nameteamQ1Q2Q3Q4
97Lincoln4C9893120
df.query('(Q1<50)&(Q2>40)&(Q3>90)')
nameteamQ1Q2Q3Q4
56DavidB2147992
72Luke6D15979599

df.filter() 支持模糊匹配,正则表达式

df.filter(items=['Q1','Q2'])
Q1Q2
08921
13637
25760
39396
46549
.........
954859
962131
979893
981174
992143

100 rows × 2 columns

类型转换astype()

df.Q1.dtypes
dtype('int64')
df.Q1.astype('int32')
0     89
1     36
2     57
3     93
4     65
      ..
95    48
96    21
97    98
98    11
99    21
Name: Q1, Length: 100, dtype: int32

转为时间类型 to_datatime()

t = pd.Series(['20220810','20220811'])
t
0    20220810
1    20220811
dtype: object
pd.to_datetime(t)
0   2022-08-10
1   2022-08-11
dtype: datetime64[ns]

数据排序- sort() axis= 1表示列,axis=0表示行

df.sort_index(ascending=True)# axis, ascending, ignore_index, inplace, by=[]
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

添加修改

replace(a,b) # 将数据中的a 换成b
insert(列的具体位置,列名,列的值)# 插入新列,列的具体位置,列名,列的值

df.where()

df
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df.iterrows()

for index, row in df.iterrows():
    print(index,row['name'],row.Q1)
0 Liver 89
1 Arry 36
2 Ack 57
3 Eorge 93
4 Oah 65
5 Harlie 24
6 Acob 61
7 Lfie 9
8 Reddie 64
9 Oscar 77
10 Leo 17
11 Logan 9
12 Archie 83
13 Theo 51
14 Thomas 80
15 James 48
16 Joshua 63
17 Henry 91
18 William 80
19 Max 97
20 Lucas 60
21 Ethan 79
22 Arthur 44
23 Mason 80
24 Isaac 74
25 Harrison 89
26 Teddy 71
27 Finley 62
28 Daniel 50
29 Riley 35
30 Edward 57
31 Joseph 67
32 Alexander 91
33 Adam 90
34 Reggie1 30
35 Samuel 9
36 Jaxon 88
37 Sebastian 1
38 Elijah 97
39 Harley 2
40 Toby 52
41 Arlo8 48
42 Dylan 86
43 Jude 8
44 Benjamin 15
45 Rory9 8
46 Tommy 29
47 Jake3 69
48 Louie 24
49 Carter7 57
50 Jenson 66
51 Hugo0 28
52 Bobby1 50
53 Frankie 18
54 Ollie3 10
55 Zachary 12
56 David 21
57 Albie1 79
58 Lewis 4
59 Luca 5
60 Ronnie 53
61 Jackson5 6
62 Matthew 44
63 Alex 14
64 Harvey2 43
65 Reuben 70
66 Jayden6 64
67 Caleb 64
68 Hunter3 38
69 Theodore3 43
70 Nathan 87
71 Blake 78
72 Luke6 15
73 Elliot 15
74 Roman 73
75 Stanley 69
76 Dexter 73
77 Michael 89
78 Elliott 9
79 Tyler 75
80 Ryan 92
81 Ellis 34
82 Finn 4
83 Albert0 85
84 Kai 66
85 Liam 2
86 Calum 14
87 Louis2 13
88 Aaron 96
89 Ezra 16
90 Leon 38
91 Connor 62
92 Grayson7 59
93 Jamie0 39
94 Aiden 20
95 Gabriel 48
96 Austin7 21
97 Lincoln4 98
98 Eli 11
99 Ben 21
for column in df:
    print(column)
name
team
Q1
Q2
Q3
Q4
for label, ser in df.items():
    print(label)
    print(ser[:3])
name
0    Liver
1     Arry
2      Ack
Name: name, dtype: object
team
0    E
1    C
2    A
Name: team, dtype: object
Q1
0    89
1    36
2    57
Name: Q1, dtype: int64
Q2
0    21
1    37
2    60
Name: Q2, dtype: int64
Q3
0    24
1    37
2    18
Name: Q3, dtype: int64
Q4
0    64
1    57
2    84
Name: Q4, dtype: int64

apply() 函数

df.name.apply(lambda x: x.lower())
0        liver
1         arry
2          ack
3        eorge
4          oah
        ...   
95     gabriel
96     austin7
97    lincoln4
98         eli
99         ben
Name: name, Length: 100, dtype: object
def mylen(x):
    return len(str(x))
df.applymap(mylen)
nameteamQ1Q2Q3Q4
0512222
1412222
2312222
3512222
4312222
.....................
95712222
96712222
97812212
98312222
99312222

100 rows × 6 columns

agg() # 聚合函数

# 每列最大值
df
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df.agg({'Q1':['sum','min'],'Q2':['min','max']})
Q1Q2
sum4920.0NaN
min1.01.0
maxNaN99.0
df.Q1.max()
98
# 使用不同方法进行聚合
df.agg(a=('Q1','max'),
      b=('Q2','min'))
Q1Q2
a98.0NaN
bNaN1.0

Pandas 分组聚合

df.groupby(by=None,axis=0,level=None, as_index: bool =True, sort: bool=True,


group_keys:bool=True,observed: bool = False,


dropna : = False)

df.groupby('team').sum()
Q1Q2Q3Q4
team
A1066639875783
B975121812021136
C1056119410681127
D860119112411199
E96310138811033
df.groupby('team').count()
nameQ1Q2Q3Q4
team
A1717171717
B2222222222
C2222222222
D1919191919
E2020202020
df.team.value_counts()
C    22
B    22
E    20
D    19
A    17
Name: team, dtype: int64
df.groupby(lambda x:x>50).sum()
Q1Q2Q3Q4
False2832270525972687
True2088255026702591

数据合并与对比

    df.append(self, other, ignore_insex=False, verify_integrity=False,sort=False)

pd.concat(objs:连接数据 ,axis=1, join=‘outer’, ignore_index=False, keys=None, levels=None,names=None,sort=False,copy=True)

df1 = pd.DataFrame({'x':[1,2],'y':[3,4]})
df2 = pd.DataFrame({'x':[5,6],'y':[7,8]})
df1
xy
013
124
pd.concat([df1,df2])
xy
013
124
057
168
pd.concat([df1,df2],axis=1)
xyxy
01357
12468
z = pd.Series([9,9], name='z')
z
0    9
1    9
Name: z, dtype: int64
df.groupby(['team',df.mean(1)>=60]).count()
E:\Anaconda\envs\py37\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  """Entry point for launching an IPython kernel.
nameQ1Q2Q3Q4
team
AFalse1414141414
True33333
BFalse1414141414
True88888
CFalse1717171717
True55555
DFalse1010101010
True99999
EFalse1515151515
True55555
arrays = [[1,1,2,2],['A','B','A','B']]
index = pd.MultiIndex.from_arrays(arrays, names=('class','team'))
index
MultiIndex([(1, 'A'),
            (1, 'B'),
            (2, 'A'),
            (2, 'B')],
           names=['class', 'team'])
pd.DataFrame([{'Q1':60,'Q2':70}], index=index)
Q1Q2
classteam
1A6070
B6070
2A6070
B6070
insex_arrays = [[1,1,2,2],['男','女','男','女']]
columns_arrays = [['2019','2019','2020','2020'],['上半年','下半年','上半年','下半年']] 

数据重现与透视

# 构造数据
dff = pd.DataFrame(
{
    'A':['a1','a1','a2','a3','a3','a3'],
    'B':['b1','b2','b3','b1','b2','b3'],
    'C':['c1','c2','c3','c4','c5','c6'],
    'D':['d1','d2','d3','d4','d5','d6']
})
dff
ABCD
0a1b1c1d1
1a1b2c2d2
2a2b3c3d3
3a3b1c4d4
4a3b2c5d5
5a3b3c6d6
dff.pivot(index='A',columns = 'B', values = 'C')
Bb1b2b3
A
a1c1c2NaN
a2NaNNaNc3
a3c4c5c6

pd.pivot_table(df,
index=[],
columns=[],
value=,
aggfunc=np.sum,
fill_value=0,
margins=True)

dff
ABCD
0a1b1c1d1
1a1b2c2d2
2a2b3c3d3
3a3b1c4d4
4a3b2c5d5
5a3b3c6d6
dff.D=np.arange(1,7)
dff
ABCD
0a1b1c11
1a1b2c22
2a2b3c33
3a3b1c44
4a3b2c55
5a3b3c66
dff.pivot_table(index='A',columns='B',values='D',fill_value=0)
Bb1b2b3
A
a1120
a2003
a3456

聚合高级操作

dff.pivot_table(index=['A','B'],columns=['C'],values='D',aggfunc=np.sum, fill_value=0, margins=True)
Cc1c2c3c4c5c6All
AB
a1b11000001
b20200002
a2b30030003
a3b10004004
b20000505
b30000066
All12345621
dff1 = pd.DataFrame({
    'A':['a1','a1','a2','a2'],
    'B':['b1','b2','b1','b2'],
    'C':[1,2,3,4],
    'D':[5,6,7,8],
    'E':[5,6,7,8]
})
dff1
ABCDE
0a1b1155
1a1b2266
2a2b1377
3a2b2488
dff1.set_index(['A','B'],inplace=True)
dff1
CDE
AB
a1b1155
b2266
a2b1377
b2488
dff1.stack()
A   B    
a1  b1  C    1
        D    5
        E    5
    b2  C    2
        D    6
        E    6
a2  b1  C    3
        D    7
        E    7
    b2  C    4
        D    8
        E    8
dtype: int64
s = dff1.stack()
s.unstack()
CDE
AB
a1b1155
b2266
a2b1377
b2488

交叉表

dff2 = pd.DataFrame(
{
    'A':['a1','a1','a2','a3','a3','a3'],
    'B':['b1','b2','b3','b1','b2','b3'],
    'C':[1,2,3,4,5,6]
})
dff2
ABC
0a1b11
1a1b22
2a2b33
3a3b14
4a3b25
5a3b36
pd.crosstab(dff2['A'],dff2['B'],normalize=True)
Bb1b2b3
A
a10.1666670.1666670.000000
a20.0000000.0000000.166667
a30.1666670.1666670.166667
dff2
ABC
0a1b11
1a1b22
2a2b33
3a3b14
4a3b25
5a3b36
dff2.melt(id_vars=['A','B'])
ABvariablevalue
0a1b1C1
1a1b2C2
2a2b3C3
3a3b1C4
4a3b2C5
5a3b3C6
dff2.melt()
variablevalue
0Aa1
1Aa1
2Aa2
3Aa3
4Aa3
5Aa3
6Bb1
7Bb2
8Bb3
9Bb1
10Bb2
11Bb3
12C1
13C2
14C3
15C4
16C5
17C6
dff2.melt(value_vars=['C','B'])
variablevalue
0C1
1C2
2C3
3C4
4C5
5C6
6Bb1
7Bb2
8Bb3
9Bb1
10Bb2
11Bb3
dff2.melt(id_vars=['A'],value_vars=['B'],var_name='B_lable',value_name='B_value')
AB_lableB_value
0a1Bb1
1a1Bb2
2a2Bb3
3a3Bb1
4a3Bb2
5a3Bb3

虚拟变量

pd.get_dummies(data,prefix=None,
prefix_sep=’_’,dummy_na=False,
columns=None, sparse=False,
drop_first=False, dtype=None)

# 因子化
data = ['b','b','a','c','b']
codes, unique = pd.factorize(data)
codes
unique
array(['b', 'a', 'c'], dtype=object)
c = pd.Series([[1,2,3],'foo',[],[3,4]])
c
0    [1, 2, 3]
1          foo
2           []
3       [3, 4]
dtype: object
c.explode()
0      1
0      2
0      3
1    foo
2    NaN
3      3
3      4
dtype: object
c1 = pd.Series({'A':[[1,2,3],'foo',[],[3,4]],'B':range(4)})
c1
A    [[1, 2, 3], foo, [], [3, 4]]
B                    (0, 1, 2, 3)
dtype: object
c1.explode()
A    [1, 2, 3]
A          foo
A           []
A       [3, 4]
B            0
B            1
B            2
B            3
dtype: object
c1.explode('A')
0    [1, 2, 3]
1          foo
2           []
3       [3, 4]
4            0
5            1
6            2
7            3
dtype: object

Pandas 数据清洗

da = pd.DataFrame({
    'A':['a1','a1','a2','a2'],
    'B':['b1','b2',None,'b2'],
    'C':[1,2,3,4],
    'D':[5,6,None,8],
    'E':[5,None,7,8]
}
)
da
ABCDE
0a1b115.05.0
1a1b226.0NaN
2a2None3NaN7.0
3a2b248.08.0
da.isna()
ABCDE
0FalseFalseFalseFalseFalse
1FalseFalseFalseFalseTrue
2FalseTrueFalseTrueFalse
3FalseFalseFalseFalseFalse
da.isnull()
ABCDE
0FalseFalseFalseFalseFalse
1FalseFalseFalseFalseTrue
2FalseTrueFalseTrueFalse
3FalseFalseFalseFalseFalse
da.isna().sum()
A    0
B    1
C    0
D    1
E    1
dtype: int64
"""
1 识别重复值
df.duplicated(subset=None,keep='first')
keep : first 第一次出现值为True
       last  最后一次出现值为True
       FALSE 所有重复值为True
2 删除重复值
df.drop_duplicate(subset=None,
                  keep='first',
                  inplace=False,
                  ignore_index = False)
keep : first 保留第一次出现的重复值
       last  保留最后一次出现重复值
       FALSE 删除所有的重复值
"""
"\n1 识别重复值\ndf.duplicated(subset=None,keep='first')\nkeep : first 第一次出现值为True\n       last  最后一次出现值为True\n       FALSE 所有重复值为True\n2 删除重复值\ndf.drop_duplicate(subset=None,\n                  keep='first',\n                  inplace=False,\n                  ignore_index = False)\nkeep : first 保留第一次出现的重复值\n       last  保留最后一次出现重复值\n       FALSE 删除所有的重复值\n"
s1 = pd.Series(['A','Boy','C',np.nan],dtype="string")
s1
0       A
1     Boy
2       C
3    <NA>
dtype: string
s1.str.lower()
0       a
1     boy
2       c
3    <NA>
dtype: string
df
nameteamQ1Q2Q3Q4
0LiverE89212464
1ArryC36373757
2AckA57601884
3EorgeC93967178
4OahD65496186
.....................
95GabrielC48598774
96Austin7C21313043
97Lincoln4C9893120
98EliE11745891
99BenE21434174

100 rows × 6 columns

df.Q1.astype(str).str
<pandas.core.strings.accessor.StringMethods at 0x1fdd73e7588>
s2 = pd.Series(['天_地_人','你_我_他',np.nan, '风_水_火'],dtype='string')
s2
0    天_地_人
1    你_我_他
2     <NA>
3    风_水_火
dtype: string
s2.str.split('_')
0    [天, 地, 人]
1    [你, 我, 他]
2         <NA>
3    [风, 水, 火]
dtype: object
s2.str.split('_').str[1]
0       地
1       我
2    <NA>
3       水
dtype: object
s2.str.split('_',expand=True)
012
0
1
2<NA><NA><NA>
3

pd.Series(['a','b','c']).repeat(2)
0    a
0    a
1    b
1    b
2    c
2    c
dtype: object
pd.Series(['a','b','c']).str.repeat(2)
0    aa
1    bb
2    cc
dtype: object
pd.Series(['a','b','c']).str.repeat([1,2,3])
0      a
1     bb
2    ccc
dtype: object
s3 = pd.Series(['x','y','z'],dtype='string')
s3
0    x
1    y
2    z
dtype: string
s3.str.cat()
'xyz'
s3.str.cat(sep=',')
'x,y,z'

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值