#%%#Series(collection of values)#DataFrame(collection of Series objects)#Panel(collection of DataFrame objects)
#%%#A Series object can hold many data types, including#float-for representing float values#int-for representing integer values#bool-for representing Boolean values#datetime64[ns]-for representing date & time, without time-zone#datetime64[ns, tz]-for representing date & time, with time-zone#timedelta[ns]-for representing differences in dates ×(seconds, minutes, etc.)#category-for representing categorical values#object-for representing String values#FILM - film name#RottenTomatoes - Rotten Tomatoes critics average score#RottenTomatoes_User - Rotten Tomatoes user average score#RT_norm - Rotten Tomatoes critics average score(normalized to a 0 to 5 point system)#RT_user_norm - Rotten Tomatoes user average score(normalized to a 0 to 5 point system)#Metacritic - Metacritic critics average score#Metacritic_User - Metacritic user average score
#%%
import pandas as pd
fandango = pd.read_csv('fandango_score_comparison.csv')
series_film = fandango['FILM']print(series_film[0:5])print("++++++++++++++++++++++")
series_rt = fandango['RottenTomatoes']print(series_rt[0:5])
#%%
fandango
#%% md
## Series的用法
#%%#Import the Series object from pandas
from pandas import Series
film_names = series_film.values # series_film = fandango['FILM']print(film_names)print("+++++++++++++++++++")print("类型为:",type(film_names))#printfilm_namesprint("************************华丽的分割线************************")
rt_scores = series_rt.values # series_rt = fandango['RottenTomatoes']print(rt_scores)print("************************在此华丽分割一下************************")
series_custom =Series(rt_scores , index=film_names) # 即以film_names作为索引,rt_scores对应的值作为值
print(series_custom)
series_custom[['Minions (2015)','Leviathan (2014)']]
#%%#intindex is also aviable
series_custom =Series(rt_scores , index=film_names) # 生成Series结构
series_custom[['Minions (2015)','Leviathan (2014)']]
fiveten = series_custom[5:10]print(fiveten)
#%% md
## 对Series进行排序
#%%
original_index = series_custom.index.tolist()print(original_index)
sorted_index =sorted(original_index) # 对索引进行排序
print("*****************分割线*********************")
sorted_by_index = series_custom.reindex(sorted_index) # 按照索引进行排序
print(sorted_by_index)
#%%
sc2 = series_custom.sort_index() # 按照索引进行排序
sc3 = series_custom.sort_values() # 按照值进行排序
print(sc2[0:10])print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")print(sc3[0:10])
#%% md
## 对series进行算术操作
#%%#The values in a Series object are treated as an ndarray, the core data type in NumPy
import numpy as np
#Add each value with each other对series结构进行相加 print(np.add(series_custom, series_custom)) # 对series结构进行相加
#Apply sine function to each value
np.sin(series_custom)#Return the highest value(will return a single value not a Series)
np.max(series_custom)
#%% md
## 对series进行逻辑判断
#%%#willactually return a Series object with a boolean value for each film
series_custom >50 # 得到的是True或者False的值
series_greater_than_50 = series_custom[series_custom >50]
criteria_one = series_custom >50print(criteria_one)print("****************************************分割一下********************************************")
criteria_two = series_custom <75
both_criteria = series_custom[criteria_one & criteria_two]print(both_criteria)
#%%#dataalignment same index
rt_critics =Series(fandango['RottenTomatoes'].values, index=fandango['FILM'])
rt_users =Series(fandango['RottenTomatoes_User'].values, index=fandango['FILM'])
rt_mean =(rt_critics + rt_users)/2print(rt_mean)
#%%
pandas pandas具体用法相关内容(四)#%%#Series (collection of values)#DataFrame (collection of Series objects)#Panel (collection of DataFrame objects)#%%#A Series object can hold many data types, including#float - for representing float values#int - for re