10分钟学习pandas

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
len(s)
6
s.describe()
count    5.000000
mean     4.600000
std      2.701851
min      1.000000
25%      3.000000
50%      5.000000
75%      6.000000
max      8.000000
dtype: float64
dates = pd.date_range('20190101',periods=6)
dates
DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06'],
              dtype='datetime64[ns]', freq='D')
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))#randn返回一个标准正态分布
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
ABCD
2019-01-010.3175750.3305490.055375-1.284453
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
2019-01-040.252868-0.778050-0.324255-1.378140
2019-01-050.2818761.143140-0.781625-1.245069
2019-01-06-0.468258-1.6823761.4940581.262588
df.describe()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
ABCD
count6.0000006.0000006.0000006.000000
mean0.159091-0.4551180.209563-0.179960
std0.4681601.0149140.7862121.293573
min-0.468258-1.682376-0.781625-1.378140
25%-0.140934-0.924528-0.229347-1.274607
50%0.267372-0.7743340.136517-0.514590
75%0.3086500.0552570.5015391.000913
max0.8426861.1431401.4940581.349425
df.shape
(6, 4)
df2 = pd.DataFrame({'A':1.,
                   'B':pd.Timestamp('20190101'),
                   'C':pd.Series(1,index=list(range(4)),dtype='float32'),
                   'D':np.array([3]*4,dtype='int32'),
                   'E':pd.Categorical(["test","train","test","train"]),
                   'F':'foo'})#foo被用作占位符的名字,用于实体举例,类似有foo,bar,baz
df2
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
ABCDEF
01.02019-01-011.03testfoo
11.02019-01-011.03trainfoo
21.02019-01-011.03testfoo
31.02019-01-011.03trainfoo
df2.dtypes
A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object
df.head()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
ABCD
2019-01-010.3175750.3305490.055375-1.284453
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
2019-01-040.252868-0.778050-0.324255-1.378140
2019-01-050.2818761.143140-0.781625-1.245069
df.tail()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
ABCD
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
2019-01-040.252868-0.778050-0.324255-1.378140
2019-01-050.2818761.143140-0.781625-1.245069
2019-01-06-0.468258-1.6823761.4940581.262588
df.index
DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06'],
              dtype='datetime64[ns]', freq='D')
df.columns
Index(['A', 'B', 'C', 'D'], dtype='object')

df.values
array([[ 0.31757454,  0.33054893,  0.05537508, -1.28445319],
       [-0.27220143, -0.77061807,  0.21765843,  1.34942538],
       [ 0.84268621, -0.97335385,  0.59616646,  0.21588867],
       [ 0.25286828, -0.77804969, -0.32425479, -1.37813964],
       [ 0.28187609,  1.14314031, -0.78162546, -1.24506887],
       [-0.4682577 , -1.68237556,  1.49405812,  1.26258772]])

df.describe()#以列统计
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
count6.0000006.0000006.0000006.000000
mean0.159091-0.4551180.209563-0.179960
std0.4681601.0149140.7862121.293573
min-0.468258-1.682376-0.781625-1.378140
25%-0.140934-0.924528-0.229347-1.274607
50%0.267372-0.7743340.136517-0.514590
75%0.3086500.0552570.5015391.000913
max0.8426861.1431401.4940581.349425
df2.describe(include='all')
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDEF
count4.044.04.044
uniqueNaN1NaNNaN21
topNaN2019-01-01 00:00:00NaNNaNtrainfoo
freqNaN4NaNNaN24
firstNaN2019-01-01 00:00:00NaNNaNNaNNaN
lastNaN2019-01-01 00:00:00NaNNaNNaNNaN
mean1.0NaN1.03.0NaNNaN
std0.0NaN0.00.0NaNNaN
min1.0NaN1.03.0NaNNaN
25%1.0NaN1.03.0NaNNaN
50%1.0NaN1.03.0NaNNaN
75%1.0NaN1.03.0NaNNaN
max1.0NaN1.03.0NaNNaN
df.T
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

2019-01-01 00:00:002019-01-02 00:00:002019-01-03 00:00:002019-01-04 00:00:002019-01-05 00:00:002019-01-06 00:00:00
A0.317575-0.2722010.8426860.2528680.281876-0.468258
B0.330549-0.770618-0.973354-0.7780501.143140-1.682376
C0.0553750.2176580.596166-0.324255-0.7816251.494058
D-1.2844531.3494250.215889-1.378140-1.2450691.262588
df.sort_index(axis=1,ascending=True)#以行为序,降序
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-010.3175750.3305490.055375-1.284453
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
2019-01-040.252868-0.778050-0.324255-1.378140
2019-01-050.2818761.143140-0.781625-1.245069
2019-01-06-0.468258-1.6823761.4940581.262588
df.sort_values(by='B')#以B的值升序排列,以列
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-06-0.468258-1.6823761.4940581.262588
2019-01-030.842686-0.9733540.5961660.215889
2019-01-040.252868-0.778050-0.324255-1.378140
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-010.3175750.3305490.055375-1.284453
2019-01-050.2818761.143140-0.781625-1.245069
df['A']#df.A 选择列
2019-01-01    0.317575
2019-01-02   -0.272201
2019-01-03    0.842686
2019-01-04    0.252868
2019-01-05    0.281876
2019-01-06   -0.468258
Freq: D, Name: A, dtype: float64

df[0:3]#只显示0,1,2,此时切割行
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-010.3175750.3305490.055375-1.284453
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
df['20190101':'20190103']#此时包含头尾
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-010.3175750.3305490.055375-1.284453
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
df.loc[dates[0]]#行
A    0.317575
B    0.330549
C    0.055375
D   -1.284453
Name: 2019-01-01 00:00:00, dtype: float64

dates[0]
Timestamp('2019-01-01 00:00:00', freq='D')

df.loc[:,['A','B']]#列
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

AB
2019-01-010.3175750.330549
2019-01-02-0.272201-0.770618
2019-01-030.842686-0.973354
2019-01-040.252868-0.778050
2019-01-050.2818761.143140
2019-01-06-0.468258-1.682376
df.loc['20190104',['A','B']].shape#返回形状变小
(2,)

#loc,at也可用来获得标量值
df.loc[dates[0],'A']
0.3175745430028141

df.at[dates[0],'A']
0.3175745430028141

df.at[dates[0],'A']#其中at/iat通过便签获取某个数值的具体位置
df.iat[1,2]#iat可用行号
0.21765843113729494

df.iloc[3]
A    0.252868
B   -0.778050
C   -0.324255
D   -1.378140
Name: 2019-01-04 00:00:00, dtype: float64

df.iloc[3,1]
-0.7780496857268889

df.iloc[[1,2,4],[0,2]]
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

AC
2019-01-02-0.2722010.217658
2019-01-030.8426860.596166
2019-01-050.281876-0.781625
df.iloc[1:3,:]
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
df.iloc[1:3]
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-02-0.272201-0.7706180.2176581.349425
2019-01-030.842686-0.9733540.5961660.215889
df[df.A>0]
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-010.3175750.3305490.055375-1.284453
2019-01-030.842686-0.9733540.5961660.215889
2019-01-040.252868-0.778050-0.324255-1.378140
2019-01-050.2818761.143140-0.781625-1.245069
df[df>0]
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2019-01-010.3175750.3305490.055375NaN
2019-01-02NaNNaN0.2176581.349425
2019-01-030.842686NaN0.5961660.215889
2019-01-040.252868NaNNaNNaN
2019-01-050.2818761.143140NaNNaN
2019-01-06NaNNaN1.4940581.262588
df3 = df.copy()
df3['E']=['one','two','three','four','three','one']
df3
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDE
2019-01-010.3175750.3305490.055375-1.284453one
2019-01-02-0.272201-0.7706180.2176581.349425two
2019-01-030.842686-0.9733540.5961660.215889three
2019-01-040.252868-0.778050-0.324255-1.378140four
2019-01-050.2818761.143140-0.781625-1.245069three
2019-01-06-0.468258-1.6823761.4940581.262588one
df3[df3['E'].isin(['one','two'])]
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDE
2019-01-010.3175750.3305490.055375-1.284453one
2019-01-02-0.272201-0.7706180.2176581.349425two
2019-01-06-0.468258-1.6823761.4940581.262588one
#设置新列时会自动按索引对齐
s1= pd.Series([1,2,3,4,5,6],index=pd.date_range('20190102',periods=6))
s1
2019-01-02    1
2019-01-03    2
2019-01-04    3
2019-01-05    4
2019-01-06    5
2019-01-07    6
Freq: D, dtype: int64

df['F']=s1
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDF
2019-01-010.3175750.3305490.055375-1.284453NaN
2019-01-02-0.272201-0.7706180.2176581.3494251.0
2019-01-030.842686-0.9733540.5961660.2158892.0
2019-01-040.252868-0.778050-0.324255-1.3781403.0
2019-01-050.2818761.143140-0.781625-1.2450694.0
2019-01-06-0.468258-1.6823761.4940581.2625885.0
df.at[dates[0],'A']=0
df.iat[0,1]=0
df.loc[:,'D']=np.array([5]*len(df))
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDF
2019-01-010.0000000.0000000.0553755NaN
2019-01-02-0.272201-0.7706180.21765851.0
2019-01-030.842686-0.9733540.59616652.0
2019-01-040.252868-0.778050-0.32425553.0
2019-01-050.2818761.143140-0.78162554.0
2019-01-06-0.468258-1.6823761.49405855.0
df4 = df.copy()
df4[df4>0]=-df4
df4
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDF
2019-01-010.0000000.000000-0.055375-5NaN
2019-01-02-0.272201-0.770618-0.217658-5-1.0
2019-01-03-0.842686-0.973354-0.596166-5-2.0
2019-01-04-0.252868-0.778050-0.324255-5-3.0
2019-01-05-0.281876-1.143140-0.781625-5-4.0
2019-01-06-0.468258-1.682376-1.494058-5-5.0
df1 = df.reindex(index=dates[0:4],columns=list(df.columns)+['E'])
df1.loc[dates[0]:dates[1],'E']=1
df1
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDFE
2019-01-010.0000000.0000000.0553755NaN1.0
2019-01-02-0.272201-0.7706180.21765851.01.0
2019-01-030.842686-0.9733540.59616652.0NaN
2019-01-040.252868-0.778050-0.32425553.0NaN
df1.dropna(how='any')
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDFE
2019-01-02-0.272201-0.7706180.21765851.01.0
df1.fillna(value=5)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDFE
2019-01-010.0000000.0000000.05537555.01.0
2019-01-02-0.272201-0.7706180.21765851.01.0
2019-01-030.842686-0.9733540.59616652.05.0
2019-01-040.252868-0.778050-0.32425553.05.0
pd.isna(df1)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDFE
2019-01-01FalseFalseFalseFalseTrueFalse
2019-01-02FalseFalseFalseFalseFalseFalse
2019-01-03FalseFalseFalseFalseFalseTrue
2019-01-04FalseFalseFalseFalseFalseTrue
df.mean()#列
A    0.106162
B   -0.510209
C    0.209563
D    5.000000
F    3.000000
dtype: float64

df.mean(1)#行
2019-01-01    1.263844
2019-01-02    1.034968
2019-01-03    1.493100
2019-01-04    1.430113
2019-01-05    1.928678
2019-01-06    1.868685
Freq: D, dtype: float64

#操作具有不同维度且需要对齐的对象。此外,pandas会自动沿着指定的维度广播。
s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)
s
2019-01-01    NaN
2019-01-02    NaN
2019-01-03    1.0
2019-01-04    3.0
2019-01-05    5.0
2019-01-06    NaN
Freq: D, dtype: float64

df.sub(s,axis='index')#每行每列按照s值进行减法操作
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDF
2019-01-01NaNNaNNaNNaNNaN
2019-01-02NaNNaNNaNNaNNaN
2019-01-03-0.157314-1.973354-0.4038344.01.0
2019-01-04-2.747132-3.778050-3.3242552.00.0
2019-01-05-4.718124-3.856860-5.7816250.0-1.0
2019-01-06NaNNaNNaNNaNNaN
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDF
2019-01-010.0000000.0000000.0553755NaN
2019-01-02-0.272201-0.7706180.21765851.0
2019-01-030.842686-0.9733540.59616652.0
2019-01-040.252868-0.778050-0.32425553.0
2019-01-050.2818761.143140-0.78162554.0
2019-01-06-0.468258-1.6823761.49405855.0
df.apply(np.cumsum)#从第一行加到最后一行
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDF
2019-01-010.0000000.0000000.0553755NaN
2019-01-02-0.272201-0.7706180.273034101.0
2019-01-030.570485-1.7439720.869200153.0
2019-01-040.823353-2.5220220.544945206.0
2019-01-051.105229-1.378881-0.2366802510.0
2019-01-060.636971-3.0612571.2573783015.0
df.apply(lambda x:x.max()-x.min())#以列
A    1.310944
B    2.825516
C    2.275684
D    0.000000
F    4.000000
dtype: float64

df['F'].value_counts()
5.0    1
4.0    1
3.0    1
2.0    1
1.0    1
Name: F, dtype: int64

s = pd.Series(np.random.randint(0,7,size=10))
s
0    1
1    1
2    1
3    2
4    3
5    2
6    6
7    0
8    4
9    6
dtype: int32

s.dtype
dtype('int32')

s.shape
(10,)

s.value_counts()
1    3
6    2
2    2
4    1
3    1
0    1
dtype: int64

s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.dtype
dtype('O')

s.str
<pandas.core.strings.StringMethods at 0x22f0313d160>

s.str.lower()
0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

#数据合并
df = pd.DataFrame(np.random.randn(10,4))
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

0123
0-0.936960-0.2106501.8870690.128005
10.6576600.254801-0.092190-1.957160
2-0.920141-2.2599640.0839650.371865
3-0.7554030.6334260.090949-0.626964
40.1700521.164537-1.1937061.391785
50.084560-1.5138140.0690320.099851
6-0.6836630.031873-0.0629980.523253
7-0.9265940.125286-1.894089-0.449402
80.6107220.3291560.025149-2.673445
90.3366731.205792-1.3461790.214389
pieces = [df[:3],df[3:7],df[7:]]
pieces
[          0         1         2         3
 0 -0.936960 -0.210650  1.887069  0.128005
 1  0.657660  0.254801 -0.092190 -1.957160
 2 -0.920141 -2.259964  0.083965  0.371865,
           0         1         2         3
 3 -0.755403  0.633426  0.090949 -0.626964
 4  0.170052  1.164537 -1.193706  1.391785
 5  0.084560 -1.513814  0.069032  0.099851
 6 -0.683663  0.031873 -0.062998  0.523253,
           0         1         2         3
 7 -0.926594  0.125286 -1.894089 -0.449402
 8  0.610722  0.329156  0.025149 -2.673445
 9  0.336673  1.205792 -1.346179  0.214389]

pd.concat(pieces)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

0123
0-0.936960-0.2106501.8870690.128005
10.6576600.254801-0.092190-1.957160
2-0.920141-2.2599640.0839650.371865
3-0.7554030.6334260.090949-0.626964
40.1700521.164537-1.1937061.391785
50.084560-1.5138140.0690320.099851
6-0.6836630.031873-0.0629980.523253
7-0.9265940.125286-1.894089-0.449402
80.6107220.3291560.025149-2.673445
90.3366731.205792-1.3461790.214389

concat

  1. axis=1列合并;axis=0行合并(default)
  2. join=‘inner’只返回合并的共同对象;join=‘outer’返回合并后的所有列
  3. ignore_index=True会自动连续生成索引值;ignore_index=False则不改变原有的索引值
  4. verify_integrity会检查当前合并是否重复索引
  5. keys会导致生成多维数组
  6. name会重新命名列名。
pd.concat(pieces,axis=1)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

012301230123
0-0.936960-0.2106501.8870690.128005NaNNaNNaNNaNNaNNaNNaNNaN
10.6576600.254801-0.092190-1.957160NaNNaNNaNNaNNaNNaNNaNNaN
2-0.920141-2.2599640.0839650.371865NaNNaNNaNNaNNaNNaNNaNNaN
3NaNNaNNaNNaN-0.7554030.6334260.090949-0.626964NaNNaNNaNNaN
4NaNNaNNaNNaN0.1700521.164537-1.1937061.391785NaNNaNNaNNaN
5NaNNaNNaNNaN0.084560-1.5138140.0690320.099851NaNNaNNaNNaN
6NaNNaNNaNNaN-0.6836630.031873-0.0629980.523253NaNNaNNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaN-0.9265940.125286-1.894089-0.449402
8NaNNaNNaNNaNNaNNaNNaNNaN0.6107220.3291560.025149-2.673445
9NaNNaNNaNNaNNaNNaNNaNNaN0.3366731.205792-1.3461790.214389
pd.concat(pieces,join='outer',axis=1)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

012301230123
0-0.936960-0.2106501.8870690.128005NaNNaNNaNNaNNaNNaNNaNNaN
10.6576600.254801-0.092190-1.957160NaNNaNNaNNaNNaNNaNNaNNaN
2-0.920141-2.2599640.0839650.371865NaNNaNNaNNaNNaNNaNNaNNaN
3NaNNaNNaNNaN-0.7554030.6334260.090949-0.626964NaNNaNNaNNaN
4NaNNaNNaNNaN0.1700521.164537-1.1937061.391785NaNNaNNaNNaN
5NaNNaNNaNNaN0.084560-1.5138140.0690320.099851NaNNaNNaNNaN
6NaNNaNNaNNaN-0.6836630.031873-0.0629980.523253NaNNaNNaNNaN
7NaNNaNNaNNaNNaNNaNNaNNaN-0.9265940.125286-1.894089-0.449402
8NaNNaNNaNNaNNaNNaNNaNNaN0.6107220.3291560.025149-2.673445
9NaNNaNNaNNaNNaNNaNNaNNaN0.3366731.205792-1.3461790.214389
#sql格式合并
left = pd.DataFrame({'key':['foo','bar'],'lval':[1,2]})
right = pd.DataFrame({'key':['foo','bar'],'rval':[4,5]})
left
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

keylval
0foo1
1bar2
right
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

keyrval
0foo4
1bar5
pd.merge(left,right,on='key')
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

keylvalrval
0foo14
1bar25
left = pd.DataFrame({'key':['foo','foo'],'lval':[1,2]})
right = pd.DataFrame({'key':['foo','foo'],'rval':[4,5]})
left
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

keylval
0foo1
1foo2
right
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

keyrval
0foo4
1foo5
pd.merge(left,right,on='key')#以某列为键值进行合并
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

keylvalrval
0foo14
1foo15
2foo24
3foo25
#将行append到dataframe中
df = pd.DataFrame(np.random.randn(8,4),columns=list('ABCD'))
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
00.862756-1.4366920.3673961.033803
1-0.732357-0.350199-0.116083-2.435210
20.3165820.4686160.433001-0.443120
3-0.189939-2.4371370.126893-2.273711
40.913514-0.752727-1.6511401.156839
5-0.3145811.2965850.579130-0.871556
60.3614730.687854-1.0446020.233138
70.0451992.176608-0.258569-1.018576
s = df.iloc[3]
df.append(s,ignore_index=False)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
00.862756-1.4366920.3673961.033803
1-0.732357-0.350199-0.116083-2.435210
20.3165820.4686160.433001-0.443120
3-0.189939-2.4371370.126893-2.273711
40.913514-0.752727-1.6511401.156839
5-0.3145811.2965850.579130-0.871556
60.3614730.687854-1.0446020.233138
70.0451992.176608-0.258569-1.018576
3-0.189939-2.4371370.126893-2.273711

“分组依据”是指涉及以下一个或多个步骤的过程:
根据某些标准将数据分成组
独立地将函数应用于每个组
将结果组合成数据结构

df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
   ....:                           'foo', 'bar', 'foo', 'foo'],
   ....:                    'B' : ['one', 'one', 'two', 'three',
   ....:                           'two', 'two', 'one', 'three'],
   ....:                    'C' : np.random.randn(8),
   ....:                    'D' : np.random.randn(8)})
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
0fooone0.982988-0.386029
1barone1.632482-0.327520
2footwo0.149619-0.138297
3barthree-1.4803971.105690
4footwo0.647044-1.097276
5bartwo-0.6755960.250176
6fooone0.4373091.031742
7foothree0.4346591.197695
df.groupby('A').sum()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

CD
A
bar-0.5235121.028346
foo2.6516180.607834
df.groupby('A').max()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

BCD
A
bartwo1.6324821.105690
footwo0.9829881.197695
df.groupby(['A','B']).sum()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

CD
AB
barone1.632482-0.327520
three-1.4803971.105690
two-0.6755960.250176
fooone1.4202970.645713
three0.4346591.197695
two0.796663-1.235574
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
   ....:                      'foo', 'foo', 'qux', 'qux'],
   ....:                     ['one', 'two', 'one', 'two',
   ....:                      'one', 'two', 'one', 'two']]))
tuples
[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表,在python3中用于减少内存

index = pd.MultiIndex.from_tuples(tuples,names=['first','second'])
index
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

df = pd.DataFrame(np.random.randn(8,2),index=index,columns=['A','B'])
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

AB
firstsecond
barone-0.7523640.982241
two-0.6266901.358099
bazone0.342360-0.618870
two-0.399448-0.500175
fooone-0.746328-0.244752
two1.556458-1.340098
quxone-0.569393-0.340625
two0.5137140.151477
#stack压缩列
stacked = df.stack()
stacked
first  second   
bar    one     A   -0.752364
               B    0.982241
       two     A   -0.626690
               B    1.358099
baz    one     A    0.342360
               B   -0.618870
       two     A   -0.399448
               B   -0.500175
foo    one     A   -0.746328
               B   -0.244752
       two     A    1.556458
               B   -1.340098
qux    one     A   -0.569393
               B   -0.340625
       two     A    0.513714
               B    0.151477
dtype: float64

stacked.unstack()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

AB
firstsecond
barone-0.7523640.982241
two-0.6266901.358099
bazone0.342360-0.618870
two-0.399448-0.500175
fooone-0.746328-0.244752
two1.556458-1.340098
quxone-0.569393-0.340625
two0.5137140.151477
stacked.unstack(1)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

secondonetwo
first
barA-0.752364-0.626690
B0.9822411.358099
bazA0.342360-0.399448
B-0.618870-0.500175
fooA-0.7463281.556458
B-0.244752-1.340098
quxA-0.5693930.513714
B-0.3406250.151477
stacked.unstack(0)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

firstbarbazfooqux
second
oneA-0.7523640.342360-0.746328-0.569393
B0.982241-0.618870-0.244752-0.340625
twoA-0.626690-0.3994481.5564580.513714
B1.358099-0.500175-1.3400980.151477
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
   .....:                    'B' : ['A', 'B', 'C'] * 4,
   .....:                    'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
   .....:                    'D' : np.random.randn(12),
   .....:                    'E' : np.random.randn(12)})
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCDE
0oneAfoo0.346892-0.615216
1oneBfoo0.8086810.161598
2twoCfoo-1.324783-0.088082
3threeAbar-0.227795-0.180022
4oneBbar1.130028-0.361439
5oneCbar0.510629-1.466063
6twoAfoo0.3795030.008279
7threeBfoo0.921087-0.148614
8oneCfoo0.332222-0.127428
9oneAbar-0.784876-0.736117
10twoBbar-0.7939571.705022
11threeCbar-0.8984851.038166
pd.pivot_table(df,values='D',index=['A','B'],columns=['C'])
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

Cbarfoo
AB
oneA-0.7848760.346892
B1.1300280.808681
C0.5106290.332222
threeA-0.227795NaN
BNaN0.921087
C-0.898485NaN
twoANaN0.379503
B-0.793957NaN
CNaN-1.324783
rng = pd.date_range('1/1/2012', periods=100, freq='S')
rng
DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               '2012-01-01 00:00:10', '2012-01-01 00:00:11',
               '2012-01-01 00:00:12', '2012-01-01 00:00:13',
               '2012-01-01 00:00:14', '2012-01-01 00:00:15',
               '2012-01-01 00:00:16', '2012-01-01 00:00:17',
               '2012-01-01 00:00:18', '2012-01-01 00:00:19',
               '2012-01-01 00:00:20', '2012-01-01 00:00:21',
               '2012-01-01 00:00:22', '2012-01-01 00:00:23',
               '2012-01-01 00:00:24', '2012-01-01 00:00:25',
               '2012-01-01 00:00:26', '2012-01-01 00:00:27',
               '2012-01-01 00:00:28', '2012-01-01 00:00:29',
               '2012-01-01 00:00:30', '2012-01-01 00:00:31',
               '2012-01-01 00:00:32', '2012-01-01 00:00:33',
               '2012-01-01 00:00:34', '2012-01-01 00:00:35',
               '2012-01-01 00:00:36', '2012-01-01 00:00:37',
               '2012-01-01 00:00:38', '2012-01-01 00:00:39',
               '2012-01-01 00:00:40', '2012-01-01 00:00:41',
               '2012-01-01 00:00:42', '2012-01-01 00:00:43',
               '2012-01-01 00:00:44', '2012-01-01 00:00:45',
               '2012-01-01 00:00:46', '2012-01-01 00:00:47',
               '2012-01-01 00:00:48', '2012-01-01 00:00:49',
               '2012-01-01 00:00:50', '2012-01-01 00:00:51',
               '2012-01-01 00:00:52', '2012-01-01 00:00:53',
               '2012-01-01 00:00:54', '2012-01-01 00:00:55',
               '2012-01-01 00:00:56', '2012-01-01 00:00:57',
               '2012-01-01 00:00:58', '2012-01-01 00:00:59',
               '2012-01-01 00:01:00', '2012-01-01 00:01:01',
               '2012-01-01 00:01:02', '2012-01-01 00:01:03',
               '2012-01-01 00:01:04', '2012-01-01 00:01:05',
               '2012-01-01 00:01:06', '2012-01-01 00:01:07',
               '2012-01-01 00:01:08', '2012-01-01 00:01:09',
               '2012-01-01 00:01:10', '2012-01-01 00:01:11',
               '2012-01-01 00:01:12', '2012-01-01 00:01:13',
               '2012-01-01 00:01:14', '2012-01-01 00:01:15',
               '2012-01-01 00:01:16', '2012-01-01 00:01:17',
               '2012-01-01 00:01:18', '2012-01-01 00:01:19',
               '2012-01-01 00:01:20', '2012-01-01 00:01:21',
               '2012-01-01 00:01:22', '2012-01-01 00:01:23',
               '2012-01-01 00:01:24', '2012-01-01 00:01:25',
               '2012-01-01 00:01:26', '2012-01-01 00:01:27',
               '2012-01-01 00:01:28', '2012-01-01 00:01:29',
               '2012-01-01 00:01:30', '2012-01-01 00:01:31',
               '2012-01-01 00:01:32', '2012-01-01 00:01:33',
               '2012-01-01 00:01:34', '2012-01-01 00:01:35',
               '2012-01-01 00:01:36', '2012-01-01 00:01:37',
               '2012-01-01 00:01:38', '2012-01-01 00:01:39'],
              dtype='datetime64[ns]', freq='S')

ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts
2012-01-01 00:00:00    225
2012-01-01 00:00:01    354
2012-01-01 00:00:02    438
2012-01-01 00:00:03    440
2012-01-01 00:00:04      9
2012-01-01 00:00:05    179
2012-01-01 00:00:06    396
2012-01-01 00:00:07    200
2012-01-01 00:00:08    413
2012-01-01 00:00:09    490
2012-01-01 00:00:10     37
2012-01-01 00:00:11     57
2012-01-01 00:00:12     33
2012-01-01 00:00:13    388
2012-01-01 00:00:14     44
2012-01-01 00:00:15     95
2012-01-01 00:00:16      8
2012-01-01 00:00:17      1
2012-01-01 00:00:18    307
2012-01-01 00:00:19    332
2012-01-01 00:00:20     20
2012-01-01 00:00:21     84
2012-01-01 00:00:22    309
2012-01-01 00:00:23    308
2012-01-01 00:00:24     67
2012-01-01 00:00:25    245
2012-01-01 00:00:26    180
2012-01-01 00:00:27      9
2012-01-01 00:00:28    126
2012-01-01 00:00:29    232
                      ... 
2012-01-01 00:01:10    409
2012-01-01 00:01:11    355
2012-01-01 00:01:12     70
2012-01-01 00:01:13    266
2012-01-01 00:01:14    118
2012-01-01 00:01:15    325
2012-01-01 00:01:16    214
2012-01-01 00:01:17      3
2012-01-01 00:01:18    143
2012-01-01 00:01:19     28
2012-01-01 00:01:20     56
2012-01-01 00:01:21    120
2012-01-01 00:01:22     99
2012-01-01 00:01:23    102
2012-01-01 00:01:24     71
2012-01-01 00:01:25    464
2012-01-01 00:01:26    489
2012-01-01 00:01:27    404
2012-01-01 00:01:28    356
2012-01-01 00:01:29    197
2012-01-01 00:01:30    390
2012-01-01 00:01:31    345
2012-01-01 00:01:32    115
2012-01-01 00:01:33    377
2012-01-01 00:01:34    388
2012-01-01 00:01:35     39
2012-01-01 00:01:36    406
2012-01-01 00:01:37    408
2012-01-01 00:01:38    410
2012-01-01 00:01:39    256
Freq: S, Length: 100, dtype: int32

rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D')
rng
DatetimeIndex(['2012-03-06', '2012-03-07', '2012-03-08', '2012-03-09',
               '2012-03-10'],
              dtype='datetime64[ns]', freq='D')

ts_utc = ts.tz_localize('UTC')

ts_utc
2012-01-01 00:00:00+00:00    225
2012-01-01 00:00:01+00:00    354
2012-01-01 00:00:02+00:00    438
2012-01-01 00:00:03+00:00    440
2012-01-01 00:00:04+00:00      9
2012-01-01 00:00:05+00:00    179
2012-01-01 00:00:06+00:00    396
2012-01-01 00:00:07+00:00    200
2012-01-01 00:00:08+00:00    413
2012-01-01 00:00:09+00:00    490
2012-01-01 00:00:10+00:00     37
2012-01-01 00:00:11+00:00     57
2012-01-01 00:00:12+00:00     33
2012-01-01 00:00:13+00:00    388
2012-01-01 00:00:14+00:00     44
2012-01-01 00:00:15+00:00     95
2012-01-01 00:00:16+00:00      8
2012-01-01 00:00:17+00:00      1
2012-01-01 00:00:18+00:00    307
2012-01-01 00:00:19+00:00    332
2012-01-01 00:00:20+00:00     20
2012-01-01 00:00:21+00:00     84
2012-01-01 00:00:22+00:00    309
2012-01-01 00:00:23+00:00    308
2012-01-01 00:00:24+00:00     67
2012-01-01 00:00:25+00:00    245
2012-01-01 00:00:26+00:00    180
2012-01-01 00:00:27+00:00      9
2012-01-01 00:00:28+00:00    126
2012-01-01 00:00:29+00:00    232
                            ... 
2012-01-01 00:01:10+00:00    409
2012-01-01 00:01:11+00:00    355
2012-01-01 00:01:12+00:00     70
2012-01-01 00:01:13+00:00    266
2012-01-01 00:01:14+00:00    118
2012-01-01 00:01:15+00:00    325
2012-01-01 00:01:16+00:00    214
2012-01-01 00:01:17+00:00      3
2012-01-01 00:01:18+00:00    143
2012-01-01 00:01:19+00:00     28
2012-01-01 00:01:20+00:00     56
2012-01-01 00:01:21+00:00    120
2012-01-01 00:01:22+00:00     99
2012-01-01 00:01:23+00:00    102
2012-01-01 00:01:24+00:00     71
2012-01-01 00:01:25+00:00    464
2012-01-01 00:01:26+00:00    489
2012-01-01 00:01:27+00:00    404
2012-01-01 00:01:28+00:00    356
2012-01-01 00:01:29+00:00    197
2012-01-01 00:01:30+00:00    390
2012-01-01 00:01:31+00:00    345
2012-01-01 00:01:32+00:00    115
2012-01-01 00:01:33+00:00    377
2012-01-01 00:01:34+00:00    388
2012-01-01 00:01:35+00:00     39
2012-01-01 00:01:36+00:00    406
2012-01-01 00:01:37+00:00    408
2012-01-01 00:01:38+00:00    410
2012-01-01 00:01:39+00:00    256
Freq: S, Length: 100, dtype: int32

ts_utc.tz_convert('US/Eastern')
2011-12-31 19:00:00-05:00    225
2011-12-31 19:00:01-05:00    354
2011-12-31 19:00:02-05:00    438
2011-12-31 19:00:03-05:00    440
2011-12-31 19:00:04-05:00      9
2011-12-31 19:00:05-05:00    179
2011-12-31 19:00:06-05:00    396
2011-12-31 19:00:07-05:00    200
2011-12-31 19:00:08-05:00    413
2011-12-31 19:00:09-05:00    490
2011-12-31 19:00:10-05:00     37
2011-12-31 19:00:11-05:00     57
2011-12-31 19:00:12-05:00     33
2011-12-31 19:00:13-05:00    388
2011-12-31 19:00:14-05:00     44
2011-12-31 19:00:15-05:00     95
2011-12-31 19:00:16-05:00      8
2011-12-31 19:00:17-05:00      1
2011-12-31 19:00:18-05:00    307
2011-12-31 19:00:19-05:00    332
2011-12-31 19:00:20-05:00     20
2011-12-31 19:00:21-05:00     84
2011-12-31 19:00:22-05:00    309
2011-12-31 19:00:23-05:00    308
2011-12-31 19:00:24-05:00     67
2011-12-31 19:00:25-05:00    245
2011-12-31 19:00:26-05:00    180
2011-12-31 19:00:27-05:00      9
2011-12-31 19:00:28-05:00    126
2011-12-31 19:00:29-05:00    232
                            ... 
2011-12-31 19:01:10-05:00    409
2011-12-31 19:01:11-05:00    355
2011-12-31 19:01:12-05:00     70
2011-12-31 19:01:13-05:00    266
2011-12-31 19:01:14-05:00    118
2011-12-31 19:01:15-05:00    325
2011-12-31 19:01:16-05:00    214
2011-12-31 19:01:17-05:00      3
2011-12-31 19:01:18-05:00    143
2011-12-31 19:01:19-05:00     28
2011-12-31 19:01:20-05:00     56
2011-12-31 19:01:21-05:00    120
2011-12-31 19:01:22-05:00     99
2011-12-31 19:01:23-05:00    102
2011-12-31 19:01:24-05:00     71
2011-12-31 19:01:25-05:00    464
2011-12-31 19:01:26-05:00    489
2011-12-31 19:01:27-05:00    404
2011-12-31 19:01:28-05:00    356
2011-12-31 19:01:29-05:00    197
2011-12-31 19:01:30-05:00    390
2011-12-31 19:01:31-05:00    345
2011-12-31 19:01:32-05:00    115
2011-12-31 19:01:33-05:00    377
2011-12-31 19:01:34-05:00    388
2011-12-31 19:01:35-05:00     39
2011-12-31 19:01:36-05:00    406
2011-12-31 19:01:37-05:00    408
2011-12-31 19:01:38-05:00    410
2011-12-31 19:01:39-05:00    256
Freq: S, Length: 100, dtype: int32

prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
prng
PeriodIndex(['1990Q1', '1990Q2', '1990Q3', '1990Q4', '1991Q1', '1991Q2',
             '1991Q3', '1991Q4', '1992Q1', '1992Q2', '1992Q3', '1992Q4',
             '1993Q1', '1993Q2', '1993Q3', '1993Q4', '1994Q1', '1994Q2',
             '1994Q3', '1994Q4', '1995Q1', '1995Q2', '1995Q3', '1995Q4',
             '1996Q1', '1996Q2', '1996Q3', '1996Q4', '1997Q1', '1997Q2',
             '1997Q3', '1997Q4', '1998Q1', '1998Q2', '1998Q3', '1998Q4',
             '1999Q1', '1999Q2', '1999Q3', '1999Q4', '2000Q1', '2000Q2',
             '2000Q3', '2000Q4'],
            dtype='period[Q-NOV]', freq='Q-NOV')

ts = pd.Series(np.random.randn(len(prng)), prng)
ts
1990Q1    1.796304
1990Q2    0.659808
1990Q3   -0.647755
1990Q4    1.846486
1991Q1    0.488348
1991Q2    1.830351
1991Q3   -1.658804
1991Q4    0.585780
1992Q1   -0.596026
1992Q2   -1.900346
1992Q3   -0.066638
1992Q4    0.419037
1993Q1    0.055711
1993Q2   -2.103900
1993Q3    0.229944
1993Q4    0.317348
1994Q1   -0.776638
1994Q2   -0.241438
1994Q3   -0.587104
1994Q4    0.825772
1995Q1    2.444721
1995Q2    0.803142
1995Q3    0.494378
1995Q4   -0.984900
1996Q1   -0.431641
1996Q2    0.766768
1996Q3   -1.176313
1996Q4    0.339700
1997Q1   -1.523029
1997Q2    0.512173
1997Q3    1.359914
1997Q4    0.564407
1998Q1    0.354859
1998Q2   -0.493561
1998Q3    0.514986
1998Q4   -0.156142
1999Q1    1.047135
1999Q2    0.648944
1999Q3   -1.581937
1999Q4    0.261181
2000Q1   -0.809498
2000Q2    1.102175
2000Q3    0.424905
2000Q4   -0.775245
Freq: Q-NOV, dtype: float64

ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9#e-end,s-start
ts.head()
1990-03-01 09:00    1.796304
1990-06-01 09:00    0.659808
1990-09-01 09:00   -0.647755
1990-12-01 09:00    1.846486
1991-03-01 09:00    0.488348
Freq: H, dtype: float64

df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

idraw_grade
01a
12b
23b
34a
45a
56e
df["grade"] = df["raw_grade"].astype("category")
df["grade"]
0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): [a, b, e]

df["grade"].cat.categories = ["very good", "good", "very bad"]
df["grade"]
0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (3, object): [very good, good, very bad]

df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
df["grade"]#methods under Series .cat return a new Series by default
0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): [very bad, bad, medium, good, very good]

df.sort_values(by="grade")
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

idraw_gradegrade
56every bad
12bgood
23bgood
01avery good
34avery good
45avery good
df.groupby("grade").size()
grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64

ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
ts
2000-01-01    0.585074
2000-01-02    0.605786
2000-01-03    0.697632
2000-01-04   -0.783338
2000-01-05    1.150780
2000-01-06   -0.718491
2000-01-07    0.696745
2000-01-08    0.270574
2000-01-09    0.657496
2000-01-10   -2.613661
2000-01-11   -1.978929
2000-01-12    0.325563
2000-01-13    0.286470
2000-01-14   -0.315502
2000-01-15    0.487703
2000-01-16   -1.830420
2000-01-17    0.847074
2000-01-18   -2.363392
2000-01-19    0.139429
2000-01-20   -0.512045
2000-01-21    0.209301
2000-01-22   -0.202987
2000-01-23   -0.605512
2000-01-24    0.113967
2000-01-25   -0.546799
2000-01-26    1.758145
2000-01-27    0.299658
2000-01-28   -0.614838
2000-01-29    0.262877
2000-01-30    0.021676
                ...   
2002-08-28    0.222110
2002-08-29   -1.846013
2002-08-30   -0.094660
2002-08-31    1.281895
2002-09-01   -1.072053
2002-09-02    0.503427
2002-09-03   -0.499512
2002-09-04   -1.080912
2002-09-05   -0.780288
2002-09-06   -0.537608
2002-09-07   -0.991904
2002-09-08    0.159327
2002-09-09    0.224638
2002-09-10    2.063388
2002-09-11    1.217366
2002-09-12    0.603689
2002-09-13    0.832689
2002-09-14   -1.788089
2002-09-15   -2.183370
2002-09-16   -0.759798
2002-09-17   -0.836241
2002-09-18    0.298536
2002-09-19    1.969939
2002-09-20   -0.688728
2002-09-21   -0.964116
2002-09-22   -1.279596
2002-09-23    0.357739
2002-09-24    1.253534
2002-09-25   -0.798673
2002-09-26   -1.023241
Freq: D, Length: 1000, dtype: float64

ts.cumsum()
2000-01-01     0.585074
2000-01-02     1.190860
2000-01-03     1.888493
2000-01-04     1.105155
2000-01-05     2.255935
2000-01-06     1.537445
2000-01-07     2.234190
2000-01-08     2.504764
2000-01-09     3.162260
2000-01-10     0.548599
2000-01-11    -1.430329
2000-01-12    -1.104767
2000-01-13    -0.818296
2000-01-14    -1.133798
2000-01-15    -0.646095
2000-01-16    -2.476516
2000-01-17    -1.629442
2000-01-18    -3.992834
2000-01-19    -3.853405
2000-01-20    -4.365450
2000-01-21    -4.156149
2000-01-22    -4.359136
2000-01-23    -4.964649
2000-01-24    -4.850682
2000-01-25    -5.397481
2000-01-26    -3.639336
2000-01-27    -3.339678
2000-01-28    -3.954516
2000-01-29    -3.691639
2000-01-30    -3.669963
                ...    
2002-08-28   -12.290664
2002-08-29   -14.136676
2002-08-30   -14.231337
2002-08-31   -12.949442
2002-09-01   -14.021495
2002-09-02   -13.518068
2002-09-03   -14.017579
2002-09-04   -15.098492
2002-09-05   -15.878779
2002-09-06   -16.416387
2002-09-07   -17.408292
2002-09-08   -17.248965
2002-09-09   -17.024327
2002-09-10   -14.960940
2002-09-11   -13.743574
2002-09-12   -13.139885
2002-09-13   -12.307196
2002-09-14   -14.095285
2002-09-15   -16.278655
2002-09-16   -17.038453
2002-09-17   -17.874694
2002-09-18   -17.576157
2002-09-19   -15.606219
2002-09-20   -16.294946
2002-09-21   -17.259062
2002-09-22   -18.538658
2002-09-23   -18.180919
2002-09-24   -16.927385
2002-09-25   -17.726058
2002-09-26   -18.749299
Freq: D, Length: 1000, dtype: float64

ts.plot()#绘制所有列
<matplotlib.axes._subplots.AxesSubplot at 0x22f031729e8>

在这里插入图片描述

df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
   .....:                   columns=['A', 'B', 'C', 'D'])
df
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

ABCD
2000-01-01-0.4347701.7971700.354820-0.166193
2000-01-020.4202330.8232510.468701-0.582949
2000-01-030.6746681.0322301.1343460.696656
2000-01-040.9486840.188295-0.604249-0.062779
2000-01-05-1.381247-0.360335-0.3464911.072618
2000-01-060.4925101.9243410.5226050.293788
2000-01-07-1.1290930.0638740.0991830.557496
2000-01-081.142263-0.3091921.1400491.007656
2000-01-091.751273-0.7471530.795127-0.480155
2000-01-10-1.519661-1.1877340.417908-0.675147
2000-01-11-0.0961921.0953080.0946481.485943
2000-01-120.109115-0.213535-0.9272501.189941
2000-01-13-0.787367-0.9197871.2867090.894471
2000-01-14-0.5848500.7940880.533716-0.159539
2000-01-15-1.352332-0.8804460.0419340.002573
2000-01-160.3179330.9579250.8137800.952499
2000-01-170.9503170.162642-0.018575-0.940598
2000-01-18-2.0211251.5921080.219355-1.300103
2000-01-19-0.673145-1.852674-0.4928450.070786
2000-01-20-0.562802-0.5040830.980132-0.079636
2000-01-210.6939270.276601-0.5022671.824789
2000-01-220.240543-0.0490040.051460-1.093965
2000-01-230.1591810.5593770.353952-1.750909
2000-01-24-1.009695-0.1699142.214441-1.301680
2000-01-250.741394-0.206067-1.250305-2.021061
2000-01-26-1.050527-0.4487260.7448410.559876
2000-01-27-0.2689870.755171-0.865320-0.077159
2000-01-28-1.445525-0.4438870.0483990.295317
2000-01-29-0.348641-0.5708660.446533-0.745215
2000-01-30-0.8038830.7198170.035095-0.057671
...............
2002-08-28-1.400857-1.993967-0.563839-0.553431
2002-08-29-0.860120-0.252746-0.5853360.083630
2002-08-300.6772180.113083-0.507485-1.247440
2002-08-311.9019130.124469-0.4829480.093981
2002-09-011.7288611.909778-1.206848-1.324399
2002-09-021.419153-1.000495-0.117854-0.630926
2002-09-030.716920-0.8317952.443522-0.247801
2002-09-04-0.886588-0.4872400.4765271.273604
2002-09-05-2.361533-0.074533-1.0950400.087406
2002-09-06-1.225924-0.4448360.378192-0.785585
2002-09-07-1.0643950.0460030.1485250.393557
2002-09-08-0.2946590.912430-0.7957670.064672
2002-09-090.2768460.993007-0.4931920.673319
2002-09-101.6760720.102106-1.286082-1.454404
2002-09-112.1245210.0694510.4950540.148496
2002-09-120.821348-0.8807140.9339781.869043
2002-09-13-0.890738-1.2639200.128660-0.282550
2002-09-14-1.0974840.6521240.702043-0.552927
2002-09-150.1613430.1573930.851718-1.265120
2002-09-160.865516-1.196734-0.985248-1.472387
2002-09-17-0.5392481.388908-0.870515-0.671165
2002-09-181.1545110.879535-0.249820-0.393302
2002-09-191.2371630.6680460.9178170.300664
2002-09-20-0.1878010.173142-0.2253072.142230
2002-09-210.517452-0.5471581.587477-0.922776
2002-09-220.4247840.6968311.3402581.252117
2002-09-23-0.687751-0.006990-0.6072200.709964
2002-09-24-1.8113470.2004852.117700-0.468944
2002-09-25-0.431668-0.3859970.3039360.817534
2002-09-260.6789591.0619571.2528700.735550

1000 rows × 4 columns

df = df.cumsum()
plt.figure(); df.plot(); plt.legend(loc='best')
<matplotlib.legend.Legend at 0x22f052c62b0>

<Figure size 432x288 with 0 Axes>

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值