pandas pandas具体用法相关内容(四)

pandas pandas具体用法相关内容(四)

在这里插入图片描述

#%%

#Series (collection of values)
#DataFrame (collection of Series objects)
#Panel (collection of DataFrame objects)

#%%

#A Series object can hold many data types, including
#float - for representing float values
#int - for representing integer values
#bool - for representing Boolean values
#datetime64[ns] - for representing date & time, without time-zone
#datetime64[ns, tz] - for representing date & time, with time-zone
#timedelta[ns] - for representing differences in dates & times (seconds, minutes, etc.)
#category - for representing categorical values
#object - for representing String values

#FILM - film name
#RottenTomatoes - Rotten Tomatoes critics average score
#RottenTomatoes_User - Rotten Tomatoes user average score
#RT_norm - Rotten Tomatoes critics average score (normalized to a 0 to 5 point system)
#RT_user_norm - Rotten Tomatoes user average score (normalized to a 0 to 5 point system)
#Metacritic - Metacritic critics average score
#Metacritic_User - Metacritic user average score

#%%

import pandas as pd
fandango = pd.read_csv('fandango_score_comparison.csv')
series_film = fandango['FILM']
print(series_film[0:5])
print("++++++++++++++++++++++")
series_rt = fandango['RottenTomatoes']
print (series_rt[0:5])

#%%

fandango

#%% md

## Series的用法

#%%

# Import the Series object from pandas
from pandas import Series

film_names = series_film.values     # series_film = fandango['FILM']
print(film_names)
print("+++++++++++++++++++")
print("类型为:",type(film_names))
#print film_names
print("************************华丽的分割线************************")
rt_scores = series_rt.values       # series_rt = fandango['RottenTomatoes']
print(rt_scores)
print("************************在此华丽分割一下************************")
series_custom = Series(rt_scores , index=film_names)   # 即以film_names作为索引,rt_scores对应的值作为值
print(series_custom)
series_custom[['Minions (2015)', 'Leviathan (2014)']]

#%%

# int index is also aviable
series_custom = Series(rt_scores , index=film_names)    # 生成Series结构
series_custom[['Minions (2015)', 'Leviathan (2014)']]
fiveten = series_custom[5:10]
print(fiveten)

#%% md

## 对Series进行排序

#%%

original_index = series_custom.index.tolist()
print(original_index)
sorted_index = sorted(original_index)   # 对索引进行排序
print("*****************分割线*********************")
sorted_by_index = series_custom.reindex(sorted_index)     # 按照索引进行排序
print(sorted_by_index)

#%%

sc2 = series_custom.sort_index()   # 按照索引进行排序
sc3 = series_custom.sort_values()  # 按照值进行排序
print(sc2[0:10])
print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
print(sc3[0:10])

#%% md

## 对series进行算术操作

#%%

#The values in a Series object are treated as an ndarray, the core data type in NumPy
import numpy as np
# Add each value with each other对series结构进行相加 
print(np.add(series_custom, series_custom))    # 对series结构进行相加 
# Apply sine function to each value
np.sin(series_custom)
# Return the highest value (will return a single value not a Series)
np.max(series_custom)

#%% md

## 对series进行逻辑判断

#%%

#will actually return a Series object with a boolean value for each film
series_custom > 50   # 得到的是True或者False的值
series_greater_than_50 = series_custom[series_custom > 50]
criteria_one = series_custom > 50
print(criteria_one)
print("****************************************分割一下********************************************")
criteria_two = series_custom < 75
both_criteria = series_custom[criteria_one & criteria_two]
print(both_criteria)

#%%

#data alignment same index
rt_critics = Series(fandango['RottenTomatoes'].values, index=fandango['FILM'])
rt_users = Series(fandango['RottenTomatoes_User'].values, index=fandango['FILM'])
rt_mean = (rt_critics + rt_users)/2

print(rt_mean)

#%%

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值