pandas操作总结

最新推荐文章于 2022-08-02 18:47:02 发布

jayvee_

最新推荐文章于 2022-08-02 18:47:02 发布

阅读量492

点赞数 1

数据分析专栏收录该内容

4 篇文章 0 订阅

订阅专栏

import pandas as pd

# 1查看pandas版本信息
pd.__version__

'0.24.2'

# 创建 Series 数据类型
# Pandas 中，Series 可以被看作由 1 列数据组成的数据集。
# 创建 Series 语法：s = pd.Series(data, index=index)，可以通过多种方式进行创建，以下介绍了 3 个常用方法。

# 3.从列表创建Series
arr = [1,2,3,4]
s1 = pd.Series(arr)  # 若没有指定索引，默认从0开始
s1

0    1
1    2
2    3
3    4
dtype: int64

# 4 .从Ndarray创建Series
import numpy as np

n = np.random.randn(5) # 随机生成一个数组
index = ['a','b','c','d','e']  # 指定了索引
s2 = pd.Series(n, index=index)
s2

a   -0.583111
b   -0.466115
c    0.542662
d   -0.745683
e   -0.529050
dtype: float64

# 5.从字典创建Series
dic = {
    'a':1,'b':2,'c':3,'d':4,'e':5
}
s3 = pd.Series(dic)
s3

a    1
b    2
c    3
d    4
e    5
dtype: int64

# 6. 修改Series索引
print(s1)
s1.index=['A','B','C','D']
s1

0    1
1    2
2    3
3    4
dtype: int64





A    1
B    2
C    3
D    4
dtype: int64

# 7.纵向拼接
s4 = s3.append(s1)  # 将s1拼接到s3
s4

a    1
b    2
c    3
d    4
e    5
A    1
B    2
C    3
D    4
dtype: int64

print(s4)
s4 = s4.drop('e')  # 删除索引为e的值
s4

a    1
b    2
c    3
d    4
e    5
A    1
B    2
C    3
D    4
dtype: int64





a    1
b    2
c    3
d    4
A    1
B    2
C    3
D    4
dtype: int64

# 8.Series按指定索引修改元素
s4['A'] = 100
s4

a      1
b      2
c      3
d      4
A    100
B      2
C      3
D      4
dtype: int64

# 9.按指定索引查找元素
s4['B']

# 10 Series切片操作
s4[:3] #对s4前三个数据访问

a    1
b    2
c    3
dtype: int64

# 11 加法运算  Series 的加法运算是按照索引计算，如果索引不同则填充为 NaN（空值）。
s4.add(s3)

A    NaN
B    NaN
C    NaN
D    NaN
a    2.0
b    4.0
c    6.0
d    8.0
e    NaN
dtype: float64

# 13.减法亦是
s4.sub(s3)

A    NaN
B    NaN
C    NaN
D    NaN
a    0.0
b    0.0
c    0.0
d    0.0
e    NaN
dtype: float64

# 14.乘法
s4.mul(s3)

A     NaN
B     NaN
C     NaN
D     NaN
a     1.0
b     4.0
c     9.0
d    16.0
e     NaN
dtype: float64

# 15  除法
s4.div(s3)

A    NaN
B    NaN
C    NaN
D    NaN
a    1.0
b    1.0
c    1.0
d    1.0
e    NaN
dtype: float64

# 16.求中位数
print(s4)
s4.median()

a      1
b      2
c      3
d      4
A    100
B      2
C      3
D      4
dtype: int64





3.0

# 17.求和
s4.sum()

# 18.19.最大最小值
print(s4.max())
s4.min()

# 创建 DataFrame 数据类型
# 与 Sereis 不同，DataFrame 可以存在多列数据。一般情况下，DataFrame 也更加常用。

# 20. 通过 NumPy 数组创建 DataFrame
dates = pd.date_range('today', periods=6)  # 定义时间序列作为index
numbers = np.random.randn(6, 4)
columns = ['A','B','C','D']
df1 = pd.DataFrame(numbers, index=dates, columns=columns)
df1

	A	B	C	D
2019-07-16 09:59:10.131414	1.536536	-1.598355	-2.354828	-1.151150
2019-07-17 09:59:10.131414	0.758288	0.143739	-0.389704	0.369642
2019-07-18 09:59:10.131414	-0.612505	0.752261	0.243023	-0.110990
2019-07-19 09:59:10.131414	0.130843	1.308658	0.765599	0.892070
2019-07-20 09:59:10.131414	1.220489	-0.415430	-0.878169	-0.215298
2019-07-21 09:59:10.131414	-0.098756	-2.210043	0.376714	0.521180

# 21.通过字典数组创建DataFrame
data = {
    'animal':['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']
}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df2 = pd.DataFrame(data, index=labels)
df2

	animal	age	visits	priority
a	cat	2.5	1	yes
b	cat	3.0	3	yes
c	snake	0.5	2	no
d	dog	NaN	3	yes
e	dog	5.0	2	no
f	cat	2.0	3	no
g	snake	4.5	1	no
h	cat	NaN	1	yes
i	dog	7.0	2	no
j	dog	3.0	1	no

#### 22. 查看 DataFrame 的数据类型
df2.dtypes

animal       object
age         float64
visits        int64
priority     object
dtype: object

# 23. 预览 DataFrame 的前 5 行数据
df2.head() # 默认前5

	animal	age	visits	priority
a	cat	2.5	1	yes
b	cat	3.0	3	yes
c	snake	0.5	2	no
d	dog	NaN	3	yes
e	dog	5.0	2	no

# 24. 查看 DataFrame 的后 3 行数据
df2.tail(3)

	animal	age	visits	priority
h	cat	NaN	1	yes
i	dog	7.0	2	no
j	dog	3.0	1	no

# 25.查看索引
df2.index

Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='object')

# 26.查看列名
df2.columns

Index(['animal', 'age', 'visits', 'priority'], dtype='object')

# 27. 查看数值
df2.values

array([['cat', 2.5, 1, 'yes'],
       ['cat', 3.0, 3, 'yes'],
       ['snake', 0.5, 2, 'no'],
       ['dog', nan, 3, 'yes'],
       ['dog', 5.0, 2, 'no'],
       ['cat', 2.0, 3, 'no'],
       ['snake', 4.5, 1, 'no'],
       ['cat', nan, 1, 'yes'],
       ['dog', 7.0, 2, 'no'],
       ['dog', 3.0, 1, 'no']], dtype=object)

# 28.查看统计数据
df2.describe()

	age	visits
count	8.000000	10.000000
mean	3.437500	1.900000
std	2.007797	0.875595
min	0.500000	1.000000
25%	2.375000	1.000000
50%	3.000000	2.000000
75%	4.625000	2.750000
max	7.000000	3.000000

# 29.转置操作
df2.T

	a	b	c	d	e	f	g	h	i	j
animal	cat	cat	snake	dog	dog	cat	snake	cat	dog	dog
age	2.5	3	0.5	NaN	5	2	4.5	NaN	7	3
visits	1	3	2	3	2	3	1	1	2	1
priority	yes	yes	no	yes	no	no	no	yes	no	no

# 30.按列排序
df2.sort_values(by='age', ascending=True)  # 默认升序

	animal	age	visits	priority
c	snake	0.5	2	no
f	cat	2.0	3	no
a	cat	2.5	1	yes
b	cat	3.0	3	yes
j	dog	3.0	1	no
g	snake	4.5	1	no
e	dog	5.0	2	no
i	dog	7.0	2	no
d	dog	NaN	3	yes
h	cat	NaN	1	yes

# 31.对DataFrame数据切片
df2[1:3]

	animal	age	visits	priority
b	cat	3.0	3	yes
c	snake	0.5	2	no

df2['age']  # 32.单列查询

a    2.5
b    3.0
c    0.5
d    NaN
e    5.0
f    2.0
g    4.5
h    NaN
i    7.0
j    3.0
Name: age, dtype: float64

df2.age

a    2.5
b    3.0
c    0.5
d    NaN
e    5.0
f    2.0
g    4.5
h    NaN
i    7.0
j    3.0
Name: age, dtype: float64

# 33.多列查询
df2[['age','animal']]

	age	animal
a	2.5	cat
b	3.0	cat
c	0.5	snake
d	NaN	dog
e	5.0	dog
f	2.0	cat
g	4.5	snake
h	NaN	cat
i	7.0	dog
j	3.0	dog

# 34.通过位置查询
df2.iloc[1:3]

	animal	age	visits	priority
b	cat	3.0	3	yes
c	snake	0.5	2	no

df2.loc['c','age']

0.5

df2.loc[:'f',['age','animal']]

	age	animal
a	2.5	cat
b	3.0	cat
c	0.5	snake
d	NaN	dog
e	5.0	dog
f	2.0	cat

# 35.副本拷贝
df3 = df2.copy()
df3

	animal	age	visits	priority
a	cat	2.5	1	yes
b	cat	3.0	3	yes
c	snake	0.5	2	no
d	dog	NaN	3	yes
e	dog	5.0	2	no
f	cat	2.0	3	no
g	snake	4.5	1	no
h	cat	NaN	1	yes
i	dog	7.0	2	no
j	dog	3.0	1	no

# 36.判断是否元素为空
df3.isnull()  # 空返回True

	animal	age	visits	priority
a	False	False	False	False
b	False	False	False	False
c	False	False	False	False
d	False	True	False	False
e	False	False	False	False
f	False	False	False	False
g	False	False	False	False
h	False	True	False	False
i	False	False	False	False
j	False	False	False	False

# 37.添加列
num= pd.Series([1,2,3,4,5,6,7,8,9,10], index=df3.index)
df3['No.'] = num
df3

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
c	snake	0.5	2	no	3
d	dog	NaN	3	yes	4
e	dog	5.0	2	no	5
f	cat	2.0	3	no	6
g	snake	4.5	1	no	7
h	cat	NaN	1	yes	8
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10

# 39.通过DataFrame的标签对数据进行修改
df3.loc['f','age'] = 1.5
df3

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
c	snake	0.5	2	no	3
d	dog	NaN	3	yes	4
e	dog	5.0	2	no	5
f	cat	1.5	3	no	6
g	snake	4.5	1	no	7
h	cat	NaN	1	yes	8
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10

# 40.求平均操作
df3.mean()

age       3.375
visits    1.900
No.       5.500
dtype: float64

# 41.对任意列求和
df3['visits'].sum()

# 42 将字符串转换为小写字母
string = pd.Series([
    'A','B','asDS',np.nan
])
print(string)
string.str.lower()

0       A
1       B
2    asDS
3     NaN
dtype: object





0       a
1       b
2    asds
3     NaN
dtype: object

# 43.转化为大写
string.str.upper()

0       A
1       B
2    ASDS
3     NaN
dtype: object

# 44.对缺失值进行填充
df4 = df3.copy()
print(df4)
df4.fillna(value=3)

  animal  age  visits priority  No.
a    cat  2.5       1      yes    1
b    cat  3.0       3      yes    2
c  snake  0.5       2       no    3
d    dog  NaN       3      yes    4
e    dog  5.0       2       no    5
f    cat  1.5       3       no    6
g  snake  4.5       1       no    7
h    cat  NaN       1      yes    8
i    dog  7.0       2       no    9
j    dog  3.0       1       no   10

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
c	snake	0.5	2	no	3
d	dog	3.0	3	yes	4
e	dog	5.0	2	no	5
f	cat	1.5	3	no	6
g	snake	4.5	1	no	7
h	cat	3.0	1	yes	8
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10

# 45.删除存在缺失值的行
df5 = df3.copy()
print(df5)
df5.dropna(how='any')  # 任何存在nan行的都将被删掉

  animal  age  visits priority  No.
a    cat  2.5       1      yes    1
b    cat  3.0       3      yes    2
c  snake  0.5       2       no    3
d    dog  NaN       3      yes    4
e    dog  5.0       2       no    5
f    cat  1.5       3       no    6
g  snake  4.5       1       no    7
h    cat  NaN       1      yes    8
i    dog  7.0       2       no    9
j    dog  3.0       1       no   10

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
c	snake	0.5	2	no	3
e	dog	5.0	2	no	5
f	cat	1.5	3	no	6
g	snake	4.5	1	no	7
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10

# 46.按指定列对齐
l = pd.DataFrame({'key':['foo1','foo2'],'one':[1,2]})
r = pd.DataFrame({'key':['foo2','foo1'], 'two':[4,5]})
print(l)
print(r)
pd.merge(l,r,on='key')  # 按照key列对齐连接，只存在foo2相同，所以最后变成一行

    key  one
0  foo1    1
1  foo2    2
    key  two
0  foo2    4
1  foo1    5

	key	one	two
0	foo1	1	5
1	foo2	2	4

# 51.建立一个以2019年每一天为索引，职位随机数的Series
dti = pd.date_range(start='20190101', end='20191231', freq="D")
s = pd.Series(np.random.rand(len(dti)), index=dti)
s

2019-01-01    0.037638
2019-01-02    0.146835
2019-01-03    0.630011
2019-01-04    0.225352
2019-01-05    0.549422
2019-01-06    0.136173
2019-01-07    0.976075
2019-01-08    0.581866
2019-01-09    0.667477
2019-01-10    0.616454
2019-01-11    0.625050
2019-01-12    0.131415
2019-01-13    0.558883
2019-01-14    0.749271
2019-01-15    0.676446
2019-01-16    0.084104
2019-01-17    0.073056
2019-01-18    0.232186
2019-01-19    0.213357
2019-01-20    0.457664
2019-01-21    0.538337
2019-01-22    0.728427
2019-01-23    0.899302
2019-01-24    0.850609
2019-01-25    0.716502
2019-01-26    0.319339
2019-01-27    0.577455
2019-01-28    0.126991
2019-01-29    0.527439
2019-01-30    0.551891
                ...   
2019-12-02    0.246504
2019-12-03    0.117549
2019-12-04    0.223977
2019-12-05    0.407109
2019-12-06    0.953820
2019-12-07    0.583962
2019-12-08    0.014735
2019-12-09    0.009190
2019-12-10    0.496330
2019-12-11    0.191981
2019-12-12    0.002935
2019-12-13    0.530197
2019-12-14    0.328830
2019-12-15    0.081638
2019-12-16    0.922251
2019-12-17    0.332389
2019-12-18    0.076567
2019-12-19    0.906216
2019-12-20    0.481311
2019-12-21    0.080405
2019-12-22    0.291532
2019-12-23    0.933424
2019-12-24    0.439771
2019-12-25    0.738565
2019-12-26    0.215401
2019-12-27    0.849687
2019-12-28    0.861060
2019-12-29    0.831074
2019-12-30    0.944307
2019-12-31    0.245717
Freq: D, Length: 365, dtype: float64

# 52.统计s中每一个周三对应值的和
s[s.index.weekday == 2].sum()  # 周一从0开始

28.54901665149845

# 53.统计s中每个月的平均值
s.resample('M').mean()

2019-01-31    0.471694
2019-02-28    0.586159
2019-03-31    0.515226
2019-04-30    0.530170
2019-05-31    0.481162
2019-06-30    0.545577
2019-07-31    0.547692
2019-08-31    0.495158
2019-09-30    0.547109
2019-10-31    0.544706
2019-11-30    0.456312
2019-12-31    0.434590
Freq: M, dtype: float64

# 66.条件查找
data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(data, index=labels)
df[df['age']>3]

	animal	age	visits	priority
e	dog	5.0	2	no
g	snake	4.5	1	no
i	dog	7.0	2	no

# 68.多重条件查询

df = pd.DataFrame(data, index=labels)
df[(df['animal']=='cat')&(df['age']<3)]

	animal	age	visits	priority
a	cat	2.5	1	yes
f	cat	2.0	3	no

# 64.按关键字查询
df3[df3['animal'].isin(['cat','dog'])]

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
d	dog	NaN	3	yes	4
e	dog	5.0	2	no	5
f	cat	1.5	3	no	6
h	cat	NaN	1	yes	8
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10

# 70.按标签名及列名查询
df.loc[df2.index[[3,4,8]],['animal','age']]

	animal	age
d	dog	NaN
e	dog	5.0
i	dog	7.0

# 71多条件排序
df.sort_values(by=['age','visits'], ascending=[False, True])  # age降序，

	animal	age	visits	priority
i	dog	7.0	2	no
e	dog	5.0	2	no
g	snake	4.5	1	no
b	cat	3.0	3	yes
j	dog	3.0	1	no
a	cat	2.5	1	yes
f	cat	2.0	3	no
c	snake	0.5	2	no
d	dog	NaN	3	yes
h	cat	NaN	1	yes

# 73分组求和
df4.groupby(by='animal').sum()

	age	visits	No.
animal
cat	7.0	8	17
dog	15.0	8	28
snake	5.0	3	10

# 数据清洗
# 88.缺失值拟合
# 在`FilghtNumber`中有数值缺失，其中数值为按 10 增长，补充相应的缺省值使得数据完整，并让数据为 `int` 类型。

df = pd.DataFrame({'From_To': ['LoNDon_paris', 'MAdrid_miLAN', 'londON_StockhOlm',
                               'Budapest_PaRis', 'Brussels_londOn'],
                   'FlightNumber': [10045, np.nan, 10065, np.nan, 10085],
                   'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]],
                   'Airline': ['KLM(!)', '<Air France> (12)', '(British Airways. )',
                               '12. Air France', '"Swiss Air"']})
df

	From_To	FlightNumber	RecentDelays	Airline
0	LoNDon_paris	10045.0	[23, 47]	KLM(!)
1	MAdrid_miLAN	NaN	[]	<Air France> (12)
2	londON_StockhOlm	10065.0	[24, 43, 87]	(British Airways. )
3	Budapest_PaRis	NaN	[13]	12. Air France
4	Brussels_londOn	10085.0	[67, 32]	"Swiss Air"

df['FlightNumber'] = df['FlightNumber'].interpolate().astype(int)

df

	From_To	FlightNumber	RecentDelays	Airline
0	LoNDon_paris	10045	[23, 47]	KLM(!)
1	MAdrid_miLAN	10055	[]	<Air France> (12)
2	londON_StockhOlm	10065	[24, 43, 87]	(British Airways. )
3	Budapest_PaRis	10075	[13]	12. Air France
4	Brussels_londOn	10085	[67, 32]	"Swiss Air"

# 89. 数据列拆分
# 其中From_to应该为两独立的两列From和To，将From_to依照_拆分为独立两列建立为一个新表。
temp = df.From_To.str.split('_',expand=True)
temp.columns = ['From','To']
temp

	From	To
0	LoNDon	paris
1	MAdrid	miLAN
2	londON	StockhOlm
3	Budapest	PaRis
4	Brussels	londOn

# 90. 字符标准化
# 其中注意到地点的名字都不规范（如：londON应该为London）需要对数据进行标准化处理。
temp['From'] = temp['From'].str.capitalize()
temp['To'] = temp['To'].str.capitalize()

temp

	From	To
0	London	Paris
1	Madrid	Milan
2	London	Stockholm
3	Budapest	Paris
4	Brussels	London

# 91. 删除坏数据加入整理好的数据
# 将最开始的 From_to 列删除，加入整理好的 From 和 to 列。
df = df.drop('From_To', axis=1)
df = df.join(temp)
df

	FlightNumber	RecentDelays	Airline	From	To
0	10045	[23, 47]	KLM(!)	London	Paris
1	10055	[]	<Air France> (12)	Madrid	Milan
2	10065	[24, 43, 87]	(British Airways. )	London	Stockholm
3	10075	[13]	12. Air France	Budapest	Paris
4	10085	[67, 32]	"Swiss Air"	Brussels	London

# 92. 去除多余字符
# 如同 airline 列中许多数据有许多其他字符，会对后期的数据分析有较大影响，需要对这类数据进行修正。
df['Airline'] = df['Airline'].str.extract(
    '([a-zA-Z\s]+)',expand=False
).str.strip()
df

	FlightNumber	RecentDelays	Airline	From	To
0	10045	[23, 47]	KLM	London	Paris
1	10055	[]	Air France	Madrid	Milan
2	10065	[24, 43, 87]	British Airways	London	Stockholm
3	10075	[13]	Air France	Budapest	Paris
4	10085	[67, 32]	Swiss Air	Brussels	London

# 93. 格式规范
# 在 RecentDelays 中记录的方式为列表类型，由于其长度不一，这会为后期数据分析造成很大麻烦。
# 这里将 RecentDelays 的列表拆开，取出列表中的相同位置元素作为一列，若为空值即用 NaN 代替。
delays = df['RecentDelays'].apply(pd.Series)
delays.columns = ['delay_{}'.format(n) for n in range(1, len(delays.columns)+1)]
df = df.drop('RecentDelays', axis=1).join(delays)
df

	FlightNumber	Airline	From	To	delay_1	delay_2	delay_3
0	10045	KLM	London	Paris	23.0	47.0	NaN
1	10055	Air France	Madrid	Milan	NaN	NaN	NaN
2	10065	British Airways	London	Stockholm	24.0	43.0	87.0
3	10075	Air France	Budapest	Paris	13.0	NaN	NaN
4	10085	Swiss Air	Brussels	London	67.0	32.0	NaN

# 数据预处理
# 94. 信息区间划分
df=pd.DataFrame({'name':['Alice','Bob','Candy','Dany','Ella','Frank','Grace','Jenny'],'grades':[58,83,79,65,93,45,61,88]})

def choice(x):
    if x> 60:
        return 1
    else:return 0

df.grades = pd.Series(map(lambda x: choice(x), df.grades))

df

	name	grades
0	Alice	0
1	Bob	1
2	Candy	1
3	Dany	1
4	Ella	1
5	Frank	0
6	Grace	1
7	Jenny	1

# 95. 数据去重
df = pd.DataFrame({'A':[1,2,3,4,5,4,4,57,8]})
df.loc[df['A'].shift() != df['A']]

	A
0	1
1	2
2	3
3	4
4	5
5	4
7	57
8	8

# 96. 数据归一化
# 有时候，DataFrame 中不同列之间的数据差距太大，需要对其进行归一化处理。
# 其中，Max-Min 归一化是简单而常见的一种方式，公式如下:
# Y=X−Xmin/Xmax−Xmin

def normalization(df):
    numberator = df.sub(df.min())
    denominator = (df.max()).sub(df.min())
    Y = numberator.div(denominator)
    return Y

df = pd.DataFrame(np.random.random(size=(5,3)))
print(df)
normalization(df)

          0         1         2
0  0.920675  0.181496  0.408179
1  0.016837  0.740842  0.239625
2  0.577404  0.503003  0.077401
3  0.502584  0.262550  0.000848
4  0.817712  0.774605  0.073925

	0	1	2
0	1.000000	0.000000	1.000000
1	0.000000	0.943074	0.586199
2	0.620207	0.542072	0.187938
3	0.537427	0.136659	0.000000
4	0.886083	1.000000	0.179404

# 97. Series 可视化
%matplotlib inline

ts = pd.Series(np.random.randn(100), index=pd.date_range('today', periods=100))
ts = ts.cumsum()
print(ts)
ts.plot()

2019-07-16 11:14:32.969237    -0.160527
2019-07-17 11:14:32.969237    -0.413502
2019-07-18 11:14:32.969237     0.494939
2019-07-19 11:14:32.969237    -0.178343
2019-07-20 11:14:32.969237    -1.279842
2019-07-21 11:14:32.969237    -0.538981
2019-07-22 11:14:32.969237    -1.952703
2019-07-23 11:14:32.969237    -2.350831
2019-07-24 11:14:32.969237    -2.652419
2019-07-25 11:14:32.969237    -4.976856
2019-07-26 11:14:32.969237    -5.596993
2019-07-27 11:14:32.969237    -4.880697
2019-07-28 11:14:32.969237    -5.918225
2019-07-29 11:14:32.969237    -4.720213
2019-07-30 11:14:32.969237    -4.056208
2019-07-31 11:14:32.969237    -3.526640
2019-08-01 11:14:32.969237    -2.295520
2019-08-02 11:14:32.969237    -0.381850
2019-08-03 11:14:32.969237    -0.077956
2019-08-04 11:14:32.969237     0.441831
2019-08-05 11:14:32.969237    -1.624691
2019-08-06 11:14:32.969237    -1.084316
2019-08-07 11:14:32.969237    -2.134124
2019-08-08 11:14:32.969237    -1.477398
2019-08-09 11:14:32.969237    -2.299194
2019-08-10 11:14:32.969237    -2.501663
2019-08-11 11:14:32.969237    -3.190793
2019-08-12 11:14:32.969237    -4.237049
2019-08-13 11:14:32.969237    -4.477230
2019-08-14 11:14:32.969237    -4.171017
                                ...    
2019-09-24 11:14:32.969237   -13.569730
2019-09-25 11:14:32.969237   -14.627188
2019-09-26 11:14:32.969237   -15.461638
2019-09-27 11:14:32.969237   -16.121560
2019-09-28 11:14:32.969237   -16.569511
2019-09-29 11:14:32.969237   -17.900842
2019-09-30 11:14:32.969237   -19.194001
2019-10-01 11:14:32.969237   -17.979293
2019-10-02 11:14:32.969237   -18.645903
2019-10-03 11:14:32.969237   -19.241367
2019-10-04 11:14:32.969237   -19.211365
2019-10-05 11:14:32.969237   -18.088419
2019-10-06 11:14:32.969237   -17.767976
2019-10-07 11:14:32.969237   -16.273883
2019-10-08 11:14:32.969237   -16.751812
2019-10-09 11:14:32.969237   -16.460468
2019-10-10 11:14:32.969237   -15.534514
2019-10-11 11:14:32.969237   -16.029253
2019-10-12 11:14:32.969237   -16.629995
2019-10-13 11:14:32.969237   -17.181734
2019-10-14 11:14:32.969237   -16.139546
2019-10-15 11:14:32.969237   -16.249424
2019-10-16 11:14:32.969237   -14.797719
2019-10-17 11:14:32.969237   -17.198546
2019-10-18 11:14:32.969237   -18.193887
2019-10-19 11:14:32.969237   -18.175841
2019-10-20 11:14:32.969237   -18.039003
2019-10-21 11:14:32.969237   -17.884838
2019-10-22 11:14:32.969237   -18.985760
2019-10-23 11:14:32.969237   -18.987684
Freq: D, Length: 100, dtype: float64





<matplotlib.axes._subplots.AxesSubplot at 0x1bc512b29b0>

在这里插入图片描述

# 98. DataFrame 折线图
df = pd.DataFrame(np.random.randn(100, 4), index=ts.index, columns=['A','B','C','D'])
df =df.cumsum()
print(df)
df.plot()

                                   A          B         C         D
2019-07-16 11:14:32.969237 -2.311551  -2.601142  0.852766  0.766899
2019-07-17 11:14:32.969237 -0.879667  -4.293468 -0.039314  0.822882
2019-07-18 11:14:32.969237 -1.249910  -5.562160 -0.456214  0.720813
2019-07-19 11:14:32.969237 -0.567523  -5.869549 -1.250540  1.204854
2019-07-20 11:14:32.969237  0.000393  -3.939871 -1.824283  1.377918
2019-07-21 11:14:32.969237 -1.957763  -4.426390 -1.644319  0.411990
2019-07-22 11:14:32.969237 -1.863936  -5.952407 -0.678510  0.882874
2019-07-23 11:14:32.969237 -2.047160  -6.771213  1.407736  1.757021
2019-07-24 11:14:32.969237 -2.230326  -6.520421  3.122783  2.976079
2019-07-25 11:14:32.969237 -3.833992  -6.785455  2.087702  4.075022
2019-07-26 11:14:32.969237 -4.315307  -8.567182  2.688330  5.365991
2019-07-27 11:14:32.969237 -5.248594  -8.344775  3.382635  4.214969
2019-07-28 11:14:32.969237 -5.054369  -7.385112  3.765415  5.066637
2019-07-29 11:14:32.969237 -2.931733  -7.085015  3.746368  5.756438
2019-07-30 11:14:32.969237 -4.190044  -7.517056  3.133894  8.217903
2019-07-31 11:14:32.969237 -3.139043  -8.779127  2.402586  7.860025
2019-08-01 11:14:32.969237 -1.870986  -8.921735  2.442751  7.956824
2019-08-02 11:14:32.969237 -1.947051  -9.726026  2.805189  8.730009
2019-08-03 11:14:32.969237 -2.468689  -7.685965  2.295436  6.795688
2019-08-04 11:14:32.969237 -2.138392  -7.481845  3.769528  7.018816
2019-08-05 11:14:32.969237 -1.521903  -5.906005  2.340666  7.280866
2019-08-06 11:14:32.969237 -0.851497  -5.947501  4.279168  6.229589
2019-08-07 11:14:32.969237 -0.745985  -6.307143  5.847261  5.630705
2019-08-08 11:14:32.969237 -0.459598  -5.138792  4.995194  5.647915
2019-08-09 11:14:32.969237 -0.324185  -5.226607  3.466786  4.292591
2019-08-10 11:14:32.969237 -0.352415  -5.121374  3.401821  4.966165
2019-08-11 11:14:32.969237  1.123371  -4.678556  2.997400  4.730402
2019-08-12 11:14:32.969237  1.621475  -4.918931  1.978229  5.891817
2019-08-13 11:14:32.969237  0.528799  -4.923886  1.741921  4.091429
2019-08-14 11:14:32.969237  0.234260  -6.577139  3.515839  3.965522
...                              ...        ...       ...       ...
2019-09-24 11:14:32.969237  2.098830  11.540368 -2.760031  2.017074
2019-09-25 11:14:32.969237  1.917497  11.425361 -2.360769  1.540259
2019-09-26 11:14:32.969237  1.586440  11.089945 -2.934906  2.016988
2019-09-27 11:14:32.969237  2.426352  11.000135 -4.160570  1.678462
2019-09-28 11:14:32.969237  2.590117  11.409677 -5.102951  3.123796
2019-09-29 11:14:32.969237  2.586017  11.673688 -5.936028  2.159731
2019-09-30 11:14:32.969237  5.012078  12.535448 -6.913949  4.082058
2019-10-01 11:14:32.969237  3.529943  14.612272 -6.541449  3.130429
2019-10-02 11:14:32.969237  3.376133  12.740237 -7.041879  3.058573
2019-10-03 11:14:32.969237  3.536676  13.233300 -6.775922  3.562460
2019-10-04 11:14:32.969237  5.075667  13.630937 -6.409229  3.404647
2019-10-05 11:14:32.969237  4.633807  14.011680 -7.359063  2.555063
2019-10-06 11:14:32.969237  4.108268  14.233577 -8.319235  1.782257
2019-10-07 11:14:32.969237  5.389960  15.049002 -7.592306  3.064996
2019-10-08 11:14:32.969237  4.904890  15.129739 -7.845749  2.197024
2019-10-09 11:14:32.969237  2.894357  14.053121 -7.560088  2.127322
2019-10-10 11:14:32.969237  2.432563  13.678098 -7.010267  2.536035
2019-10-11 11:14:32.969237  1.493160  13.263020 -7.262265  2.954692
2019-10-12 11:14:32.969237  2.477873  14.443603 -7.815188  2.420356
2019-10-13 11:14:32.969237  1.914146  14.476938 -6.850849  2.985317
2019-10-14 11:14:32.969237  1.944343  13.532021 -7.611172  4.754920
2019-10-15 11:14:32.969237  2.379594  13.908116 -8.503684  5.217389
2019-10-16 11:14:32.969237  1.479926  13.646017 -7.861792  4.769845
2019-10-17 11:14:32.969237  3.376088  12.470308 -7.902426  4.735779
2019-10-18 11:14:32.969237  3.847433  12.177020 -6.719579  3.123475
2019-10-19 11:14:32.969237  3.904511  12.261467 -6.016796  3.419390
2019-10-20 11:14:32.969237  3.188237  14.305071 -6.137896  2.905813
2019-10-21 11:14:32.969237  4.006034  13.981431 -6.034235  2.483323
2019-10-22 11:14:32.969237  4.187015  14.311562 -6.466325  0.531675
2019-10-23 11:14:32.969237  4.928834  14.064165 -6.435447 -0.506871

[100 rows x 4 columns]





<matplotlib.axes._subplots.AxesSubplot at 0x1bc5148c748>

在这里插入图片描述

# 99. DataFrame 散点图
df = pd.DataFrame({"revenue": [57, 68, 63, 71, 72, 90, 80, 62, 59, 51, 47, 52],
                   "advertising": [2.1, 1.9, 2.7, 3.0, 3.6, 3.2, 2.7, 2.4, 1.8, 1.6, 1.3, 1.9],
                   "month": range(12)
                   })
ax = df.plot.bar('month','revenue',color='yellow')
df.plot('month','advertising', secondary_y=True,ax=ax)

<matplotlib.axes._subplots.AxesSubplot at 0x1bc52686eb8>

在这里插入图片描述

jayvee_

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
d	dog	NaN	3	yes	4
e	dog	5.0	2	no	5
f	cat	1.5	3	no	6
h	cat	NaN	1	yes	8
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
d	dog	NaN	3	yes	4
e	dog	5.0	2	no	5
f	cat	1.5	3	no	6
h	cat	NaN	1	yes	8
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10

	animal	age	visits	priority	No.
a	cat	2.5	1	yes	1
b	cat	3.0	3	yes	2
d	dog	NaN	3	yes	4
e	dog	5.0	2	no	5
f	cat	1.5	3	no	6
h	cat	NaN	1	yes	8
i	dog	7.0	2	no	9
j	dog	3.0	1	no	10