pandas操作总结

import pandas as pd
# 1查看pandas版本信息
pd.__version__
'0.24.2'
# 创建 Series 数据类型
# Pandas 中,Series 可以被看作由 1 列数据组成的数据集。
# 创建 Series 语法:s = pd.Series(data, index=index),可以通过多种方式进行创建,以下介绍了 3 个常用方法。
# 3.从列表创建Series
arr = [1,2,3,4]
s1 = pd.Series(arr)  # 若没有指定索引,默认从0开始
s1
0    1
1    2
2    3
3    4
dtype: int64
# 4 .从Ndarray创建Series
import numpy as np

n = np.random.randn(5) # 随机生成一个数组
index = ['a','b','c','d','e']  # 指定了索引
s2 = pd.Series(n, index=index)
s2
a   -0.583111
b   -0.466115
c    0.542662
d   -0.745683
e   -0.529050
dtype: float64
# 5.从字典创建Series
dic = {
    'a':1,'b':2,'c':3,'d':4,'e':5
}
s3 = pd.Series(dic)
s3
a    1
b    2
c    3
d    4
e    5
dtype: int64
# 6. 修改Series索引
print(s1)
s1.index=['A','B','C','D']
s1
0    1
1    2
2    3
3    4
dtype: int64





A    1
B    2
C    3
D    4
dtype: int64
# 7.纵向拼接
s4 = s3.append(s1)  # 将s1拼接到s3
s4
a    1
b    2
c    3
d    4
e    5
A    1
B    2
C    3
D    4
dtype: int64
print(s4)
s4 = s4.drop('e')  # 删除索引为e的值
s4
a    1
b    2
c    3
d    4
e    5
A    1
B    2
C    3
D    4
dtype: int64





a    1
b    2
c    3
d    4
A    1
B    2
C    3
D    4
dtype: int64
# 8.Series按指定索引修改元素
s4['A'] = 100
s4
a      1
b      2
c      3
d      4
A    100
B      2
C      3
D      4
dtype: int64
# 9.按指定索引查找元素
s4['B']
2
# 10 Series切片操作
s4[:3] #对s4前三个数据访问
a    1
b    2
c    3
dtype: int64
# 11 加法运算  Series 的加法运算是按照索引计算,如果索引不同则填充为 NaN(空值)。
s4.add(s3)
A    NaN
B    NaN
C    NaN
D    NaN
a    2.0
b    4.0
c    6.0
d    8.0
e    NaN
dtype: float64
# 13.减法亦是
s4.sub(s3)
A    NaN
B    NaN
C    NaN
D    NaN
a    0.0
b    0.0
c    0.0
d    0.0
e    NaN
dtype: float64
# 14.乘法
s4.mul(s3)
A     NaN
B     NaN
C     NaN
D     NaN
a     1.0
b     4.0
c     9.0
d    16.0
e     NaN
dtype: float64
# 15  除法
s4.div(s3)
A    NaN
B    NaN
C    NaN
D    NaN
a    1.0
b    1.0
c    1.0
d    1.0
e    NaN
dtype: float64
# 16.求中位数
print(s4)
s4.median()
a      1
b      2
c      3
d      4
A    100
B      2
C      3
D      4
dtype: int64





3.0
# 17.求和
s4.sum()
119
# 18.19.最大最小值
print(s4.max())
s4.min()
100





1
# 创建 DataFrame 数据类型
# 与 Sereis 不同,DataFrame 可以存在多列数据。一般情况下,DataFrame 也更加常用。
# 20. 通过 NumPy 数组创建 DataFrame
dates = pd.date_range('today', periods=6)  # 定义时间序列作为index
numbers = np.random.randn(6, 4)
columns = ['A','B','C','D']
df1 = pd.DataFrame(numbers, index=dates, columns=columns)
df1
ABCD
2019-07-16 09:59:10.1314141.536536-1.598355-2.354828-1.151150
2019-07-17 09:59:10.1314140.7582880.143739-0.3897040.369642
2019-07-18 09:59:10.131414-0.6125050.7522610.243023-0.110990
2019-07-19 09:59:10.1314140.1308431.3086580.7655990.892070
2019-07-20 09:59:10.1314141.220489-0.415430-0.878169-0.215298
2019-07-21 09:59:10.131414-0.098756-2.2100430.3767140.521180
# 21.通过字典数组创建DataFrame
data = {
    'animal':['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']
}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df2 = pd.DataFrame(data, index=labels)
df2
animalagevisitspriority
acat2.51yes
bcat3.03yes
csnake0.52no
ddogNaN3yes
edog5.02no
fcat2.03no
gsnake4.51no
hcatNaN1yes
idog7.02no
jdog3.01no
#### 22. 查看 DataFrame 的数据类型
df2.dtypes
animal       object
age         float64
visits        int64
priority     object
dtype: object
# 23. 预览 DataFrame 的前 5 行数据
df2.head() # 默认前5
animalagevisitspriority
acat2.51yes
bcat3.03yes
csnake0.52no
ddogNaN3yes
edog5.02no
# 24. 查看 DataFrame 的后 3 行数据
df2.tail(3)
animalagevisitspriority
hcatNaN1yes
idog7.02no
jdog3.01no
# 25.查看索引
df2.index
Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='object')
# 26.查看列名
df2.columns
Index(['animal', 'age', 'visits', 'priority'], dtype='object')
# 27. 查看数值
df2.values
array([['cat', 2.5, 1, 'yes'],
       ['cat', 3.0, 3, 'yes'],
       ['snake', 0.5, 2, 'no'],
       ['dog', nan, 3, 'yes'],
       ['dog', 5.0, 2, 'no'],
       ['cat', 2.0, 3, 'no'],
       ['snake', 4.5, 1, 'no'],
       ['cat', nan, 1, 'yes'],
       ['dog', 7.0, 2, 'no'],
       ['dog', 3.0, 1, 'no']], dtype=object)
# 28.查看统计数据
df2.describe()
agevisits
count8.00000010.000000
mean3.4375001.900000
std2.0077970.875595
min0.5000001.000000
25%2.3750001.000000
50%3.0000002.000000
75%4.6250002.750000
max7.0000003.000000
# 29.转置操作
df2.T
abcdefghij
animalcatcatsnakedogdogcatsnakecatdogdog
age2.530.5NaN524.5NaN73
visits1323231121
priorityyesyesnoyesnononoyesnono
# 30.按列排序
df2.sort_values(by='age', ascending=True)  # 默认升序
animalagevisitspriority
csnake0.52no
fcat2.03no
acat2.51yes
bcat3.03yes
jdog3.01no
gsnake4.51no
edog5.02no
idog7.02no
ddogNaN3yes
hcatNaN1yes
# 31.对DataFrame数据切片
df2[1:3]
animalagevisitspriority
bcat3.03yes
csnake0.52no
df2['age']  # 32.单列查询
a    2.5
b    3.0
c    0.5
d    NaN
e    5.0
f    2.0
g    4.5
h    NaN
i    7.0
j    3.0
Name: age, dtype: float64
df2.age
a    2.5
b    3.0
c    0.5
d    NaN
e    5.0
f    2.0
g    4.5
h    NaN
i    7.0
j    3.0
Name: age, dtype: float64
# 33.多列查询
df2[['age','animal']]
ageanimal
a2.5cat
b3.0cat
c0.5snake
dNaNdog
e5.0dog
f2.0cat
g4.5snake
hNaNcat
i7.0dog
j3.0dog
# 34.通过位置查询
df2.iloc[1:3]
animalagevisitspriority
bcat3.03yes
csnake0.52no
df2.loc['c','age']
0.5
df2.loc[:'f',['age','animal']]
ageanimal
a2.5cat
b3.0cat
c0.5snake
dNaNdog
e5.0dog
f2.0cat
# 35.副本拷贝
df3 = df2.copy()
df3
animalagevisitspriority
acat2.51yes
bcat3.03yes
csnake0.52no
ddogNaN3yes
edog5.02no
fcat2.03no
gsnake4.51no
hcatNaN1yes
idog7.02no
jdog3.01no
# 36.判断是否元素为空
df3.isnull()  # 空返回True
animalagevisitspriority
aFalseFalseFalseFalse
bFalseFalseFalseFalse
cFalseFalseFalseFalse
dFalseTrueFalseFalse
eFalseFalseFalseFalse
fFalseFalseFalseFalse
gFalseFalseFalseFalse
hFalseTrueFalseFalse
iFalseFalseFalseFalse
jFalseFalseFalseFalse
# 37.添加列
num= pd.Series([1,2,3,4,5,6,7,8,9,10], index=df3.index)
df3['No.'] = num
df3
animalagevisitspriorityNo.
acat2.51yes1
bcat3.03yes2
csnake0.52no3
ddogNaN3yes4
edog5.02no5
fcat2.03no6
gsnake4.51no7
hcatNaN1yes8
idog7.02no9
jdog3.01no10
# 39.通过DataFrame的标签对数据进行修改
df3.loc['f','age'] = 1.5
df3
animalagevisitspriorityNo.
acat2.51yes1
bcat3.03yes2
csnake0.52no3
ddogNaN3yes4
edog5.02no5
fcat1.53no6
gsnake4.51no7
hcatNaN1yes8
idog7.02no9
jdog3.01no10
# 40.求平均操作
df3.mean()
age       3.375
visits    1.900
No.       5.500
dtype: float64
# 41.对任意列求和
df3['visits'].sum()
19
# 42 将字符串转换为小写字母
string = pd.Series([
    'A','B','asDS',np.nan
])
print(string)
string.str.lower()
0       A
1       B
2    asDS
3     NaN
dtype: object





0       a
1       b
2    asds
3     NaN
dtype: object
# 43.转化为大写
string.str.upper()
0       A
1       B
2    ASDS
3     NaN
dtype: object
# 44.对缺失值进行填充
df4 = df3.copy()
print(df4)
df4.fillna(value=3)
  animal  age  visits priority  No.
a    cat  2.5       1      yes    1
b    cat  3.0       3      yes    2
c  snake  0.5       2       no    3
d    dog  NaN       3      yes    4
e    dog  5.0       2       no    5
f    cat  1.5       3       no    6
g  snake  4.5       1       no    7
h    cat  NaN       1      yes    8
i    dog  7.0       2       no    9
j    dog  3.0       1       no   10
animalagevisitspriorityNo.
acat2.51yes1
bcat3.03yes2
csnake0.52no3
ddog3.03yes4
edog5.02no5
fcat1.53no6
gsnake4.51no7
hcat3.01yes8
idog7.02no9
jdog3.01no10
# 45.删除存在缺失值的行
df5 = df3.copy()
print(df5)
df5.dropna(how='any')  # 任何存在nan行的都将被删掉
  animal  age  visits priority  No.
a    cat  2.5       1      yes    1
b    cat  3.0       3      yes    2
c  snake  0.5       2       no    3
d    dog  NaN       3      yes    4
e    dog  5.0       2       no    5
f    cat  1.5       3       no    6
g  snake  4.5       1       no    7
h    cat  NaN       1      yes    8
i    dog  7.0       2       no    9
j    dog  3.0       1       no   10
animalagevisitspriorityNo.
acat2.51yes1
bcat3.03yes2
csnake0.52no3
edog5.02no5
fcat1.53no6
gsnake4.51no7
idog7.02no9
jdog3.01no10
# 46.按指定列对齐
l = pd.DataFrame({'key':['foo1','foo2'],'one':[1,2]})
r = pd.DataFrame({'key':['foo2','foo1'], 'two':[4,5]})
print(l)
print(r)
pd.merge(l,r,on='key')  # 按照key列对齐连接,只存在foo2相同,所以最后变成一行
    key  one
0  foo1    1
1  foo2    2
    key  two
0  foo2    4
1  foo1    5
keyonetwo
0foo115
1foo224
# 51.建立一个以2019年每一天为索引,职位随机数的Series
dti = pd.date_range(start='20190101', end='20191231', freq="D")
s = pd.Series(np.random.rand(len(dti)), index=dti)
s
2019-01-01    0.037638
2019-01-02    0.146835
2019-01-03    0.630011
2019-01-04    0.225352
2019-01-05    0.549422
2019-01-06    0.136173
2019-01-07    0.976075
2019-01-08    0.581866
2019-01-09    0.667477
2019-01-10    0.616454
2019-01-11    0.625050
2019-01-12    0.131415
2019-01-13    0.558883
2019-01-14    0.749271
2019-01-15    0.676446
2019-01-16    0.084104
2019-01-17    0.073056
2019-01-18    0.232186
2019-01-19    0.213357
2019-01-20    0.457664
2019-01-21    0.538337
2019-01-22    0.728427
2019-01-23    0.899302
2019-01-24    0.850609
2019-01-25    0.716502
2019-01-26    0.319339
2019-01-27    0.577455
2019-01-28    0.126991
2019-01-29    0.527439
2019-01-30    0.551891
                ...   
2019-12-02    0.246504
2019-12-03    0.117549
2019-12-04    0.223977
2019-12-05    0.407109
2019-12-06    0.953820
2019-12-07    0.583962
2019-12-08    0.014735
2019-12-09    0.009190
2019-12-10    0.496330
2019-12-11    0.191981
2019-12-12    0.002935
2019-12-13    0.530197
2019-12-14    0.328830
2019-12-15    0.081638
2019-12-16    0.922251
2019-12-17    0.332389
2019-12-18    0.076567
2019-12-19    0.906216
2019-12-20    0.481311
2019-12-21    0.080405
2019-12-22    0.291532
2019-12-23    0.933424
2019-12-24    0.439771
2019-12-25    0.738565
2019-12-26    0.215401
2019-12-27    0.849687
2019-12-28    0.861060
2019-12-29    0.831074
2019-12-30    0.944307
2019-12-31    0.245717
Freq: D, Length: 365, dtype: float64
# 52.统计s中每一个周三对应值的和
s[s.index.weekday == 2].sum()  # 周一从0开始
28.54901665149845
# 53.统计s中每个月的平均值
s.resample('M').mean()
2019-01-31    0.471694
2019-02-28    0.586159
2019-03-31    0.515226
2019-04-30    0.530170
2019-05-31    0.481162
2019-06-30    0.545577
2019-07-31    0.547692
2019-08-31    0.495158
2019-09-30    0.547109
2019-10-31    0.544706
2019-11-30    0.456312
2019-12-31    0.434590
Freq: M, dtype: float64
# 66.条件查找
data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(data, index=labels)
df[df['age']>3]
animalagevisitspriority
edog5.02no
gsnake4.51no
idog7.02no
# 68.多重条件查询
df = pd.DataFrame(data, index=labels)
df[(df['animal']=='cat')&(df['age']<3)]
animalagevisitspriority
acat2.51yes
fcat2.03no
# 64.按关键字查询
df3[df3['animal'].isin(['cat','dog'])]
animalagevisitspriorityNo.
acat2.51yes1
bcat3.03yes2
ddogNaN3yes4
edog5.02no5
fcat1.53no6
hcatNaN1yes8
idog7.02no9
jdog3.01no10
# 70.按标签名及列名查询
df.loc[df2.index[[3,4,8]],['animal','age']]
animalage
ddogNaN
edog5.0
idog7.0
# 71多条件排序
df.sort_values(by=['age','visits'], ascending=[False, True])  # age降序,
animalagevisitspriority
idog7.02no
edog5.02no
gsnake4.51no
bcat3.03yes
jdog3.01no
acat2.51yes
fcat2.03no
csnake0.52no
ddogNaN3yes
hcatNaN1yes
# 73分组求和
df4.groupby(by='animal').sum()
agevisitsNo.
animal
cat7.0817
dog15.0828
snake5.0310
# 数据清洗
# 88.缺失值拟合
# 在`FilghtNumber`中有数值缺失,其中数值为按 10 增长,补充相应的缺省值使得数据完整,并让数据为 `int` 类型。
df = pd.DataFrame({'From_To': ['LoNDon_paris', 'MAdrid_miLAN', 'londON_StockhOlm',
                               'Budapest_PaRis', 'Brussels_londOn'],
                   'FlightNumber': [10045, np.nan, 10065, np.nan, 10085],
                   'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]],
                   'Airline': ['KLM(!)', '<Air France> (12)', '(British Airways. )',
                               '12. Air France', '"Swiss Air"']})
df

From_ToFlightNumberRecentDelaysAirline
0LoNDon_paris10045.0[23, 47]KLM(!)
1MAdrid_miLANNaN[]<Air France> (12)
2londON_StockhOlm10065.0[24, 43, 87](British Airways. )
3Budapest_PaRisNaN[13]12. Air France
4Brussels_londOn10085.0[67, 32]"Swiss Air"
df['FlightNumber'] = df['FlightNumber'].interpolate().astype(int)
df

From_ToFlightNumberRecentDelaysAirline
0LoNDon_paris10045[23, 47]KLM(!)
1MAdrid_miLAN10055[]<Air France> (12)
2londON_StockhOlm10065[24, 43, 87](British Airways. )
3Budapest_PaRis10075[13]12. Air France
4Brussels_londOn10085[67, 32]"Swiss Air"
# 89. 数据列拆分
# 其中From_to应该为两独立的两列From和To,将From_to依照_拆分为独立两列建立为一个新表。
temp = df.From_To.str.split('_',expand=True)
temp.columns = ['From','To']
temp
FromTo
0LoNDonparis
1MAdridmiLAN
2londONStockhOlm
3BudapestPaRis
4BrusselslondOn
# 90. 字符标准化
# 其中注意到地点的名字都不规范(如:londON应该为London)需要对数据进行标准化处理。
temp['From'] = temp['From'].str.capitalize()
temp['To'] = temp['To'].str.capitalize()
temp
FromTo
0LondonParis
1MadridMilan
2LondonStockholm
3BudapestParis
4BrusselsLondon
# 91. 删除坏数据加入整理好的数据
# 将最开始的 From_to 列删除,加入整理好的 From 和 to 列。
df = df.drop('From_To', axis=1)
df = df.join(temp)
df
FlightNumberRecentDelaysAirlineFromTo
010045[23, 47]KLM(!)LondonParis
110055[]<Air France> (12)MadridMilan
210065[24, 43, 87](British Airways. )LondonStockholm
310075[13]12. Air FranceBudapestParis
410085[67, 32]"Swiss Air"BrusselsLondon
# 92. 去除多余字符
# 如同 airline 列中许多数据有许多其他字符,会对后期的数据分析有较大影响,需要对这类数据进行修正。
df['Airline'] = df['Airline'].str.extract(
    '([a-zA-Z\s]+)',expand=False
).str.strip()
df
FlightNumberRecentDelaysAirlineFromTo
010045[23, 47]KLMLondonParis
110055[]Air FranceMadridMilan
210065[24, 43, 87]British AirwaysLondonStockholm
310075[13]Air FranceBudapestParis
410085[67, 32]Swiss AirBrusselsLondon
# 93. 格式规范
# 在 RecentDelays 中记录的方式为列表类型,由于其长度不一,这会为后期数据分析造成很大麻烦。
# 这里将 RecentDelays 的列表拆开,取出列表中的相同位置元素作为一列,若为空值即用 NaN 代替。
delays = df['RecentDelays'].apply(pd.Series)
delays.columns = ['delay_{}'.format(n) for n in range(1, len(delays.columns)+1)]
df = df.drop('RecentDelays', axis=1).join(delays)
df
FlightNumberAirlineFromTodelay_1delay_2delay_3
010045KLMLondonParis23.047.0NaN
110055Air FranceMadridMilanNaNNaNNaN
210065British AirwaysLondonStockholm24.043.087.0
310075Air FranceBudapestParis13.0NaNNaN
410085Swiss AirBrusselsLondon67.032.0NaN
# 数据预处理
# 94. 信息区间划分
df=pd.DataFrame({'name':['Alice','Bob','Candy','Dany','Ella','Frank','Grace','Jenny'],'grades':[58,83,79,65,93,45,61,88]})
def choice(x):
    if x> 60:
        return 1
    else:return 0
df.grades = pd.Series(map(lambda x: choice(x), df.grades))
df
namegrades
0Alice0
1Bob1
2Candy1
3Dany1
4Ella1
5Frank0
6Grace1
7Jenny1
# 95. 数据去重
df = pd.DataFrame({'A':[1,2,3,4,5,4,4,57,8]})
df.loc[df['A'].shift() != df['A']]
A
01
12
23
34
45
54
757
88
# 96. 数据归一化
# 有时候,DataFrame 中不同列之间的数据差距太大,需要对其进行归一化处理。
# 其中,Max-Min 归一化是简单而常见的一种方式,公式如下:
# Y=X−Xmin/Xmax−Xmin
def normalization(df):
    numberator = df.sub(df.min())
    denominator = (df.max()).sub(df.min())
    Y = numberator.div(denominator)
    return Y
df = pd.DataFrame(np.random.random(size=(5,3)))
print(df)
normalization(df)
          0         1         2
0  0.920675  0.181496  0.408179
1  0.016837  0.740842  0.239625
2  0.577404  0.503003  0.077401
3  0.502584  0.262550  0.000848
4  0.817712  0.774605  0.073925
012
01.0000000.0000001.000000
10.0000000.9430740.586199
20.6202070.5420720.187938
30.5374270.1366590.000000
40.8860831.0000000.179404
# 97. Series 可视化
%matplotlib inline
ts = pd.Series(np.random.randn(100), index=pd.date_range('today', periods=100))
ts = ts.cumsum()
print(ts)
ts.plot()
2019-07-16 11:14:32.969237    -0.160527
2019-07-17 11:14:32.969237    -0.413502
2019-07-18 11:14:32.969237     0.494939
2019-07-19 11:14:32.969237    -0.178343
2019-07-20 11:14:32.969237    -1.279842
2019-07-21 11:14:32.969237    -0.538981
2019-07-22 11:14:32.969237    -1.952703
2019-07-23 11:14:32.969237    -2.350831
2019-07-24 11:14:32.969237    -2.652419
2019-07-25 11:14:32.969237    -4.976856
2019-07-26 11:14:32.969237    -5.596993
2019-07-27 11:14:32.969237    -4.880697
2019-07-28 11:14:32.969237    -5.918225
2019-07-29 11:14:32.969237    -4.720213
2019-07-30 11:14:32.969237    -4.056208
2019-07-31 11:14:32.969237    -3.526640
2019-08-01 11:14:32.969237    -2.295520
2019-08-02 11:14:32.969237    -0.381850
2019-08-03 11:14:32.969237    -0.077956
2019-08-04 11:14:32.969237     0.441831
2019-08-05 11:14:32.969237    -1.624691
2019-08-06 11:14:32.969237    -1.084316
2019-08-07 11:14:32.969237    -2.134124
2019-08-08 11:14:32.969237    -1.477398
2019-08-09 11:14:32.969237    -2.299194
2019-08-10 11:14:32.969237    -2.501663
2019-08-11 11:14:32.969237    -3.190793
2019-08-12 11:14:32.969237    -4.237049
2019-08-13 11:14:32.969237    -4.477230
2019-08-14 11:14:32.969237    -4.171017
                                ...    
2019-09-24 11:14:32.969237   -13.569730
2019-09-25 11:14:32.969237   -14.627188
2019-09-26 11:14:32.969237   -15.461638
2019-09-27 11:14:32.969237   -16.121560
2019-09-28 11:14:32.969237   -16.569511
2019-09-29 11:14:32.969237   -17.900842
2019-09-30 11:14:32.969237   -19.194001
2019-10-01 11:14:32.969237   -17.979293
2019-10-02 11:14:32.969237   -18.645903
2019-10-03 11:14:32.969237   -19.241367
2019-10-04 11:14:32.969237   -19.211365
2019-10-05 11:14:32.969237   -18.088419
2019-10-06 11:14:32.969237   -17.767976
2019-10-07 11:14:32.969237   -16.273883
2019-10-08 11:14:32.969237   -16.751812
2019-10-09 11:14:32.969237   -16.460468
2019-10-10 11:14:32.969237   -15.534514
2019-10-11 11:14:32.969237   -16.029253
2019-10-12 11:14:32.969237   -16.629995
2019-10-13 11:14:32.969237   -17.181734
2019-10-14 11:14:32.969237   -16.139546
2019-10-15 11:14:32.969237   -16.249424
2019-10-16 11:14:32.969237   -14.797719
2019-10-17 11:14:32.969237   -17.198546
2019-10-18 11:14:32.969237   -18.193887
2019-10-19 11:14:32.969237   -18.175841
2019-10-20 11:14:32.969237   -18.039003
2019-10-21 11:14:32.969237   -17.884838
2019-10-22 11:14:32.969237   -18.985760
2019-10-23 11:14:32.969237   -18.987684
Freq: D, Length: 100, dtype: float64





<matplotlib.axes._subplots.AxesSubplot at 0x1bc512b29b0>

在这里插入图片描述

# 98. DataFrame 折线图
df = pd.DataFrame(np.random.randn(100, 4), index=ts.index, columns=['A','B','C','D'])
df =df.cumsum()
print(df)
df.plot()                        
                                                                    
                                   A          B         C         D
2019-07-16 11:14:32.969237 -2.311551  -2.601142  0.852766  0.766899
2019-07-17 11:14:32.969237 -0.879667  -4.293468 -0.039314  0.822882
2019-07-18 11:14:32.969237 -1.249910  -5.562160 -0.456214  0.720813
2019-07-19 11:14:32.969237 -0.567523  -5.869549 -1.250540  1.204854
2019-07-20 11:14:32.969237  0.000393  -3.939871 -1.824283  1.377918
2019-07-21 11:14:32.969237 -1.957763  -4.426390 -1.644319  0.411990
2019-07-22 11:14:32.969237 -1.863936  -5.952407 -0.678510  0.882874
2019-07-23 11:14:32.969237 -2.047160  -6.771213  1.407736  1.757021
2019-07-24 11:14:32.969237 -2.230326  -6.520421  3.122783  2.976079
2019-07-25 11:14:32.969237 -3.833992  -6.785455  2.087702  4.075022
2019-07-26 11:14:32.969237 -4.315307  -8.567182  2.688330  5.365991
2019-07-27 11:14:32.969237 -5.248594  -8.344775  3.382635  4.214969
2019-07-28 11:14:32.969237 -5.054369  -7.385112  3.765415  5.066637
2019-07-29 11:14:32.969237 -2.931733  -7.085015  3.746368  5.756438
2019-07-30 11:14:32.969237 -4.190044  -7.517056  3.133894  8.217903
2019-07-31 11:14:32.969237 -3.139043  -8.779127  2.402586  7.860025
2019-08-01 11:14:32.969237 -1.870986  -8.921735  2.442751  7.956824
2019-08-02 11:14:32.969237 -1.947051  -9.726026  2.805189  8.730009
2019-08-03 11:14:32.969237 -2.468689  -7.685965  2.295436  6.795688
2019-08-04 11:14:32.969237 -2.138392  -7.481845  3.769528  7.018816
2019-08-05 11:14:32.969237 -1.521903  -5.906005  2.340666  7.280866
2019-08-06 11:14:32.969237 -0.851497  -5.947501  4.279168  6.229589
2019-08-07 11:14:32.969237 -0.745985  -6.307143  5.847261  5.630705
2019-08-08 11:14:32.969237 -0.459598  -5.138792  4.995194  5.647915
2019-08-09 11:14:32.969237 -0.324185  -5.226607  3.466786  4.292591
2019-08-10 11:14:32.969237 -0.352415  -5.121374  3.401821  4.966165
2019-08-11 11:14:32.969237  1.123371  -4.678556  2.997400  4.730402
2019-08-12 11:14:32.969237  1.621475  -4.918931  1.978229  5.891817
2019-08-13 11:14:32.969237  0.528799  -4.923886  1.741921  4.091429
2019-08-14 11:14:32.969237  0.234260  -6.577139  3.515839  3.965522
...                              ...        ...       ...       ...
2019-09-24 11:14:32.969237  2.098830  11.540368 -2.760031  2.017074
2019-09-25 11:14:32.969237  1.917497  11.425361 -2.360769  1.540259
2019-09-26 11:14:32.969237  1.586440  11.089945 -2.934906  2.016988
2019-09-27 11:14:32.969237  2.426352  11.000135 -4.160570  1.678462
2019-09-28 11:14:32.969237  2.590117  11.409677 -5.102951  3.123796
2019-09-29 11:14:32.969237  2.586017  11.673688 -5.936028  2.159731
2019-09-30 11:14:32.969237  5.012078  12.535448 -6.913949  4.082058
2019-10-01 11:14:32.969237  3.529943  14.612272 -6.541449  3.130429
2019-10-02 11:14:32.969237  3.376133  12.740237 -7.041879  3.058573
2019-10-03 11:14:32.969237  3.536676  13.233300 -6.775922  3.562460
2019-10-04 11:14:32.969237  5.075667  13.630937 -6.409229  3.404647
2019-10-05 11:14:32.969237  4.633807  14.011680 -7.359063  2.555063
2019-10-06 11:14:32.969237  4.108268  14.233577 -8.319235  1.782257
2019-10-07 11:14:32.969237  5.389960  15.049002 -7.592306  3.064996
2019-10-08 11:14:32.969237  4.904890  15.129739 -7.845749  2.197024
2019-10-09 11:14:32.969237  2.894357  14.053121 -7.560088  2.127322
2019-10-10 11:14:32.969237  2.432563  13.678098 -7.010267  2.536035
2019-10-11 11:14:32.969237  1.493160  13.263020 -7.262265  2.954692
2019-10-12 11:14:32.969237  2.477873  14.443603 -7.815188  2.420356
2019-10-13 11:14:32.969237  1.914146  14.476938 -6.850849  2.985317
2019-10-14 11:14:32.969237  1.944343  13.532021 -7.611172  4.754920
2019-10-15 11:14:32.969237  2.379594  13.908116 -8.503684  5.217389
2019-10-16 11:14:32.969237  1.479926  13.646017 -7.861792  4.769845
2019-10-17 11:14:32.969237  3.376088  12.470308 -7.902426  4.735779
2019-10-18 11:14:32.969237  3.847433  12.177020 -6.719579  3.123475
2019-10-19 11:14:32.969237  3.904511  12.261467 -6.016796  3.419390
2019-10-20 11:14:32.969237  3.188237  14.305071 -6.137896  2.905813
2019-10-21 11:14:32.969237  4.006034  13.981431 -6.034235  2.483323
2019-10-22 11:14:32.969237  4.187015  14.311562 -6.466325  0.531675
2019-10-23 11:14:32.969237  4.928834  14.064165 -6.435447 -0.506871

[100 rows x 4 columns]





<matplotlib.axes._subplots.AxesSubplot at 0x1bc5148c748>

在这里插入图片描述

# 99. DataFrame 散点图
df = pd.DataFrame({"revenue": [57, 68, 63, 71, 72, 90, 80, 62, 59, 51, 47, 52],
                   "advertising": [2.1, 1.9, 2.7, 3.0, 3.6, 3.2, 2.7, 2.4, 1.8, 1.6, 1.3, 1.9],
                   "month": range(12)
                   })
ax = df.plot.bar('month','revenue',color='yellow')
df.plot('month','advertising', secondary_y=True,ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x1bc52686eb8>

在这里插入图片描述


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值