#series 是值的集合
#DataFrame 是series的集合
#Panel是DataFrame的集合
import pandas as pd
fandango = pd.read_csv("D:\\test\\fandango_score_comparison.csv")
series_film=fandango["FILM"]
series_rt=fandango["RottenTomatoes"]
fandango.head(2)
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Fandango_Stars | Fandango_Ratingvalue | RT_norm | RT_user_norm | … | IMDB_norm | RT_norm_round | RT_user_norm_round | Metacritic_norm_round | Metacritic_user_norm_round | IMDB_norm_round | Metacritic_user_vote_count | IMDB_user_vote_count | Fandango_votes | Fandango_Difference | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Avengers: Age of Ultron (2015) | 74 | 86 | 66 | 7.1 | 7.8 | 5.0 | 4.5 | 3.70 | 4.3 | … | 3.90 | 3.5 | 4.5 | 3.5 | 3.5 | 4.0 | 1330 | 271107 | 14846 | 0.5 |
1 | Cinderella (2015) | 85 | 80 | 67 | 7.5 | 7.1 | 5.0 | 4.5 | 4.25 | 4.0 | … | 3.55 | 4.5 | 4.0 | 3.5 | 4.0 | 3.5 | 249 | 65709 | 12640 | 0.5 |
2 rows × 22 columns
from pandas import Series
film_value=series_film.values
rt_value=series_rt.values
my_series=Series(rt_value,index=film_value) #以film_value为索引,值为rt_value
my_series["Cinderella (2015)"] #电影名字成为了索引
85
my_series[["Cinderella (2015)","Avengers: Age of Ultron (2015)"]]
Cinderella (2015) 85 Avengers: Age of Ultron (2015) 74 dtype: int64
my_list=my_series.index.tolist()
print (my_list[1:3])
sord_list=sorted(my_list) #排序
sorded_series=my_series.reindex(sord_list) #将排序后的index重新最为index
print (sorded_series["Cinderella (2015)"]) #索引重新排序后值并没有变
print (sorded_series.head(3))
[‘Cinderella (2015)’, ‘Ant-Man (2015)’] 85 ‘71 (2015) 97 5 Flights Up (2015) 52 A Little Chaos (2015) 40 dtype: int64
my_series.sort_index()[0:3] #按照索引从小到大排序
‘71 (2015) 97 5 Flights Up (2015) 52 A Little Chaos (2015) 40 dtype: int64
my_series.sort_values()[0:3] #按照值排序
Paul Blart: Mall Cop 2 (2015) 5 Hitman: Agent 47 (2015) 7 Hot Pursuit (2015) 8 dtype: int64
import numpy as np
#数据相加
np.add(my_series,my_series)[1:3] #Pandas实际是基于numpy的,所以numpy的计算在pandas上同样适用
Cinderella (2015) 170 Ant-Man (2015) 160 dtype: int64
np.max(my_series)
100
my_series[(rt_value > 50) & (rt_value < 55)] #取50大55之间的值
Unbroken (2014) 51 5 Flights Up (2015) 52 Saint Laurent (2015) 51 The Age of Adaline (2015) 54 Maggie (2015) 54 Escobar: Paradise Lost (2015) 52 Woman in Gold (2015) 52 Minions (2015) 54 Spare Parts (2015) 52 dtype: int64
type(fandango) #DataFrame类型
pandas.core.frame.DataFrame
new_data=fandango.set_index("FILM",drop=False).head(2)#以RottenTomatoes为索引,并且判断是否要保留原来的值,False是仍然要保留
new_data[1:2]
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Fandango_Stars | Fandango_Ratingvalue | RT_norm | RT_user_norm | … | IMDB_norm | RT_norm_round | RT_user_norm_round | Metacritic_norm_round | Metacritic_user_norm_round | IMDB_norm_round | Metacritic_user_vote_count | IMDB_user_vote_count | Fandango_votes | Fandango_Difference | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
FILM | |||||||||||||||||||||
Cinderella (2015) | Cinderella (2015) | 85 | 80 | 67 | 7.5 | 7.1 | 5.0 | 4.5 | 4.25 | 4.0 | … | 3.55 | 4.5 | 4.0 | 3.5 | 4.0 | 3.5 | 249 | 65709 | 12640 | 0.5 |
1 rows × 22 columns
import numpy as np
types=new_data.dtypes
print (types)
float_index=types[types.values == 'float64'].index #拿到数值是float64的列的索引
new_data = new_data[float_index] #打印所有列是float64的行
print (new_data)
FILM object
RottenTomatoes int64
RottenTomatoes_User int64
Metacritic int64
Metacritic_User float64
IMDB float64
Fandango_Stars float64
Fandango_Ratingvalue float64
RT_norm float64
RT_user_norm float64
Metacritic_norm float64
Metacritic_user_nom float64
IMDB_norm float64
RT_norm_round float64
RT_user_norm_round float64
Metacritic_norm_round float64
Metacritic_user_norm_round float64
IMDB_norm_round float64
Metacritic_user_vote_count int64
IMDB_user_vote_count int64
Fandango_votes int64
Fandango_Difference float64
dtype: object
Metacritic_User IMDB Fandango_Stars \
FILM
Avengers: Age of Ultron (2015) 7.1 7.8 5.0
Cinderella (2015) 7.5 7.1 5.0
Fandango_Ratingvalue RT_norm RT_user_norm \
FILM
Avengers: Age of Ultron (2015) 4.5 3.70 4.3
Cinderella (2015) 4.5 4.25 4.0
Metacritic_norm Metacritic_user_nom \
FILM
Avengers: Age of Ultron (2015) 3.30 3.55
Cinderella (2015) 3.35 3.75
IMDB_norm RT_norm_round RT_user_norm_round \
FILM
Avengers: Age of Ultron (2015) 3.90 3.5 4.5
Cinderella (2015) 3.55 4.5 4.0
Metacritic_norm_round \
FILM
Avengers: Age of Ultron (2015) 3.5
Cinderella (2015) 3.5
Metacritic_user_norm_round IMDB_norm_round \
FILM
Avengers: Age of Ultron (2015) 3.5 4.0
Cinderella (2015) 4.0 3.5
Fandango_Difference
FILM
Avengers: Age of Ultron (2015) 0.5
Cinderella (2015) 0.5
new_data.apply(lambda x:np.std(x)) #求方差
Metacritic_User 0.200
IMDB 0.350
Fandango_Stars 0.000
Fandango_Ratingvalue 0.000
RT_norm 0.275
RT_user_norm 0.150
Metacritic_norm 0.025
Metacritic_user_nom 0.100
IMDB_norm 0.175
RT_norm_round 0.500
RT_user_norm_round 0.250
Metacritic_norm_round 0.000
Metacritic_user_norm_round 0.250
IMDB_norm_round 0.250
Fandango_Difference 0.000
dtype: float64
new_data.apply(lambda x:np.std(x),axis=1) #按列来求方差
FILM
Avengers: Age of Ultron (2015) 1.611714
Cinderella (2015) 1.554778
dtype: float64