python学习笔记(常用库)

一、numpy

二、pandas

2.3 series结构

基本操作

import pandas as pd

fandango = pd.read_csv('E:\\唐宇迪机器学习\Python库代码(4个)\\2-数据分析处理库pandas\\fandango_score_comparison.csv')
series_film = fandango['FILM']
print(type(series_film))
print(series_film[0:5])
serie_rt = fandango['RottenTomatoes']
print(serie_rt[0:5])

输出:

<class 'pandas.core.series.Series'>
0    Avengers: Age of Ultron (2015)
1                 Cinderella (2015)
2                    Ant-Man (2015)
3            Do You Believe? (2015)
4     Hot Tub Time Machine 2 (2015)
Name: FILM, dtype: object
0    74
1    85
2    80
3    18
4    14
Name: RottenTomatoes, dtype: int64

DataFrame中的结构是Series,Series中的结构是ndarray

Series创建

from pandas import Series
film_names = series_film.values
print(type(film_names))
rt_scores = serie_rt.values #电影评分
series_custom = Series(rt_scores ,index = film_names)#这里拿string值(电影名称)当索引
print(series_custom[['Irrational Man (2015)','Love & Mercy (2015)']]) #打印索引对应的值
print('---------------------')
print(series_custom[5:10])

输出:

<class 'numpy.ndarray'>
Irrational Man (2015)    42
Love & Mercy (2015)      89
dtype: int64
---------------------
The Water Diviner (2015)        63
Irrational Man (2015)           42
Top Five (2014)                 86
Shaun the Sheep Movie (2015)    99
Love & Mercy (2015)             89
dtype: int64

Series排序

print('----------------对Series进行排序--------------')
original_index = series_custom.index.tolist()
sorted_index = sorted(original_index)
sorted_by_index = series_custom.reindex(sorted_index)
print(sorted_by_index)

输出:

----------------对Series进行排序--------------
'71 (2015)                          97
5 Flights Up (2015)                 52
A Little Chaos (2015)               40
A Most Violent Year (2014)          90
About Elly (2015)                   97
...

相加、求最大值

import numpy as np
print('----------------对Series进行相加--------------')
print(np.add(series_custom,series_custom))
print(np.max(series_custom))

输出:

----------------对Series进行相加--------------
Avengers: Age of Ultron (2015)               148
Cinderella (2015)                            170
Ant-Man (2015)                               160
...
100

获取评分在50和75之间的电影

#will actually return a Series object with a boolean value for each film
series_custom > 50
series_greater_than_50 = series_custom[series_custom > 50]

criteria_one = series_custom > 50
criteria_two = series_custom < 75
both_criteria = series_custom[criteria_one & criteria_two]
print (both_criteria)

输出:

Avengers: Age of Ultron (2015)                                            74
The Water Diviner (2015)                                                  63
Unbroken (2014)                                                           51
Southpaw (2015)                                                           59
Insidious: Chapter 3 (2015)                                               59
The Man From U.N.C.L.E. (2015)                                            68
...

计算两个媒体评分的平均值

#data alignment same index
rt_critics = Series(fandango['RottenTomatoes'].values, index=fandango['FILM'])
rt_users = Series(fandango['RottenTomatoes_User'].values, index=fandango['FILM'])
rt_mean = (rt_critics + rt_users)/2

print(rt_mean)

输出:

Avengers: Age of Ultron (2015)                    80.0
Cinderella (2015)                                 82.5
Ant-Man (2015)                                    85.0
Do You Believe? (2015)                            51.0
...

DataFram设置字符串为索引

fandango_films = fandango.set_index('FILM', drop=False)
print(fandango_films.index)

结果:

Index(['Avengers: Age of Ultron (2015)', 'Cinderella (2015)', 'Ant-Man (2015)',
       'Do You Believe? (2015)', 'Hot Tub Time Machine 2 (2015)',
       'Shaun the Sheep Movie (2015)', 'Love & Mercy (2015)',
       ...
       'The Woman In Black 2 Angel of Death (2015)', 'Danny Collins (2015)',
       'Gett: The Trial of Viviane Amsalem (2015)',
       'Kumiko, The Treasure Hunter (2015)']

三、matplotlib

折线图

子图

条形图和散点图

案例一

import pandas as pd
reviews = pd.read_csv('E:\\唐宇迪机器学习\Python库代码(4个)\\2-数据分析处理库pandas\\fandango_score_comparison.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
print(norm_reviews[:1])

import matplotlib.pyplot as plt
from numpy import arange
#The Axes.bar() method has 2 required parameters, left and height.
#We use the left parameter to specify the x coordinates of the left sides of the bar.
#We use the height parameter to specify the height of each bar
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_heights = norm_reviews.loc[0, num_cols].values
#print bar_heights
bar_positions = arange(5) + 0.75
#print bar_positions
fig, ax = plt.subplots()
ax.bar(bar_positions, bar_heights, 0.5) #0.5表示宽度
plt.show()

结果:
在这里插入图片描述
案例二

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.loc[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()

ax.bar(bar_positions, bar_heights, 0.5)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation=45)

ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

结果:
在这里插入图片描述
案例三
横向展示

import matplotlib.pyplot as plt
from numpy import arange
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_widths = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_widths, 0.5) #barh  表示横向

ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

结果:
在这里插入图片描述
案例四
散点图

fig, ax = plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()

结果:
在这里插入图片描述
案例五

fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue'])
ax2.set_xlabel('Rotten Tomatoes')
ax2.set_ylabel('Fandango')
plt.show()

结果:
在这里插入图片描述

柱形图和盒图

案例一
横坐标指定范围区间计数

import pandas as pd
reviews = pd.read_csv('E:\\唐宇迪机器学习\Python库代码(4个)\\2-数据分析处理库pandas\\fandango_score_comparison.csv')
import matplotlib.pyplot as plt
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
#print(norm_reviews[:5])


fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()

print(fandango_distribution)
#print(imdb_distribution)

fig, ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)
plt.show()

结果:

2.7     2
2.8     2
2.9     5
3.0     4
3.1     3
3.2     5
3.3     4
3.4     9
3.5     9
3.6     8
3.7     9
3.8     5
3.9    12
4.0     7
4.1    16
4.2    12
4.3    11
4.4     7
4.5     9
4.6     4
4.8     3
Name: Fandango_Ratingvalue, dtype: int64

在这里插入图片描述
如果不指定bins,会自动分,指定了bins=20,就分成20个柱子。

案例二

指定y轴区间

fig = plt.figure(figsize=(5,5))
ax1 = fig.add_subplot(1,1,1)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0, 50)
plt.show()

结果:
在这里插入图片描述
案例二

盒图:
四分图,分成四份。

fig, ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()

结果:
在这里插入图片描述
案例三

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0,5)
plt.show()

结果:
在这里插入图片描述

四、seaborn

他在matplot的基础上进行了封装。
案例一

import seaborn as sns
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)

sinplot()
plt.show()

结果:
在这里插入图片描述
五种主题风格:

darkgrid
whitegrid
dark
white
ticks
sns.set_style("whitegrid")
data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
sns.boxplot(data=data)
plt.show()

在这里插入图片描述

sns.set_style("dark")
sinplot()
plt.show()

在这里插入图片描述

sns.set_style("ticks") #有线段
sinplot()
plt.show()

在这里插入图片描述

sinplot()
sns.despine() #去掉一些轴

在这里插入图片描述

调色板

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值