# pandas uses zero-indexing# Series object representing the row at index 0.print(food_info.loc[0])# 获取数据# Series object representing the seventh row.
food_info.loc[6]# Will throw an error: "KeyError: 'the label [8620] is not in the [index]'"# food_info.loc[8620]
# object - For string values# int - For integer values# float - For float values# datetime - For time values# bool - For Boolean values# print(food_info.dtypes)
# Returns a DataFrame containing the rows at indexes 3, 4, 5, and 6.print("loc_3_6:\n", food_info.loc[3:6])# 切片取数据# Returns a DataFrame containing the rows at indexes 2, 5, and 10. Either of the following approaches will work.# Method 1
two_five_ten =[2,5,10]print("loc_3_6_1:\n", food_info.loc[two_five_ten])# Method 2print("loc_3_6_2:\n", food_info.loc[[2,5,10]])
# Series object representing the "NDB_No" column.
ndb_col = food_info["NDB_No"]# 输入列名,得到这一列的数据print("ndb_col:\n", ndb_col)# Alternatively, you can access a column by passing in a string variable.
col_name ="NDB_No"
ndb_col = food_info[col_name]print("ndb_col_v2:\n", ndb_col)
print(food_info["Iron_(mg)"])
div_1000 = food_info["Iron_(mg)"]/1000print("div_1000:\n", div_1000)# Adds 100 to each value in the column and returns a Series object.
add_100 = food_info["Iron_(mg)"]+100print("add_100:\n", add_100)# Subtracts 100 from each value in the column and returns a Series object.
sub_100 = food_info["Iron_(mg)"]-100print("sub_100:\n",sub_100)# Multiplies each value in the column by 2 and returns a Series object.
mult_2 = food_info["Iron_(mg)"]*2print("mult_2:\n",mult_2)
# It applies the arithmetic operator to the first value in both columns, the second value in both columns, and so on
water_energy = food_info["Water_(g)"]* food_info["Energ_Kcal"]
water_energy = food_info["Water_(g)"]* food_info["Energ_Kcal"]
iron_grams = food_info["Iron_(mg)"]/1000# print(food_info.shape)
food_info["Iron_(g)"]= iron_grams # 新建一个列名,并对其赋值print(food_info.shape)
# the "Vit_A_IU" column ranges from 0 to 100000, while the "Fiber_TD_(g)" column ranges from 0 to 79#For certain calculations, columns like "Vit_A_IU" can have a greater effect on the result, #due to the scale of the values# The largest value in the "Energ_Kcal" column.
max_calories = food_info["Energ_Kcal"].min()print(max_calories)# Divide the values in "Energ_Kcal" by the largest value.
normalized_calories = food_info["Energ_Kcal"]/ max_calories
normalized_protein = food_info["Protein_(g)"]/ food_info["Protein_(g)"].max()
normalized_fat = food_info["Lipid_Tot_(g)"]/ food_info["Lipid_Tot_(g)"].max()
food_info["Normalized_Protein"]= normalized_protein
food_info["Normalized_Fat"]= normalized_fat
print(food_info.shape)
#By default, pandas will sort the data by the column we specify in ascending order and return a new DataFrame# Sorts the DataFrame in-place, rather than=true returning a new DataFrame.#print food_info["Sodium_(mg)"]
food_info.sort_values("Sodium_(mg)", inplace=True)# 指定列进行从小到大排序print(food_info["Sodium_(mg)"])#Sorts by descending order, rather than ascending.
food_info.sort_values("Sodium_(mg)", inplace=True, ascending=False)# 降序print(food_info["Sodium_(mg)"])
760 0.0
758 0.0
405 0.0
761 0.0
2269 0.0
...
8184 NaN
8185 NaN
8195 NaN
8251 NaN
8267 NaN
Name: Sodium_(mg), Length: 8618, dtype: float64
276 38758.0
5814 27360.0
6192 26050.0
1242 26000.0
1245 24000.0
...
8184 NaN
8185 NaN
8195 NaN
8251 NaN
8267 NaN
Name: Sodium_(mg), Length: 8618, dtype: float64
#we have to filter out the missing values before we calculate the mean.
good_ages = titanic_survival["Age"][age_is_null ==False]#print good_ages
correct_mean_age =sum(good_ages)/len(good_ages)print(correct_mean_age)
29.69911764705882
# missing data is so common that many pandas methods automatically filter for it# 线程的
correct_mean_age = titanic_survival["Age"].mean()print(correct_mean_age)
29.69911764705882
#mean fare for each class# 分别求不同船舱的平均价格
passenger_classes =[1,2,3]
fares_by_class ={}# 定义一个字典for this_class in passenger_classes:
pclass_rows = titanic_survival[titanic_survival["Pclass"]== this_class]
pclass_fares = pclass_rows["Fare"]# 定位到价格
fare_for_class = pclass_fares.mean()
fares_by_class[this_class]= fare_for_class
print(fares_by_class)
#index tells the method which column to group by#values is the column that we want to apply the calculation to#aggfunc specifies the calculation we want to perform# index:以这个为基准# valuses:统计index与这个值的关系
passenger_survival = titanic_survival.pivot_table(index="Pclass", values="Survived", aggfunc=np.mean)print(passenger_survival)
Fare Survived
Embarked
C 10072.2962 93
Q 1022.2543 30
S 17439.3988 217
# specifying axis=1 or axis='columns' will drop any columns that have null values
drop_na_columns = titanic_survival.dropna(axis=1)# axis是维度# 如果再Age和Sex之间有缺失值,就会把数据丢掉
new_titanic_survival = titanic_survival.dropna(axis=0,subset=["Age","Sex"])print(new_titanic_survival.head(2))
PassengerId Survived Pclass \
0 1 0 3
1 2 1 1
Name Sex Age SibSp \
0 Braund, Mr. Owen Harris male 22.0 1
1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1
Parch Ticket Fare Cabin Embarked
0 0 A/5 21171 7.2500 NaN S
1 0 PC 17599 71.2833 C85 C
PassengerId Survived Pclass Name \
630 631 1 1 Barkworth, Mr. Algernon Henry Wilson
851 852 0 3 Svensson, Mr. Johan
493 494 0 1 Artagaveytia, Mr. Ramon
96 97 0 1 Goldschmidt, Mr. George B
116 117 0 3 Connors, Mr. Patrick
672 673 0 2 Mitchell, Mr. Henry Michael
745 746 0 1 Crosby, Capt. Edward Gifford
33 34 0 2 Wheadon, Mr. Edward H
54 55 0 1 Ostby, Mr. Engelhart Cornelius
280 281 0 3 Duane, Mr. Frank
Sex Age SibSp Parch Ticket Fare Cabin Embarked
630 male 80.0 0 0 27042 30.0000 A23 S
851 male 74.0 0 0 347060 7.7750 NaN S
493 male 71.0 0 0 PC 17609 49.5042 NaN C
96 male 71.0 0 0 PC 17754 34.6542 A5 C
116 male 70.5 0 0 370369 7.7500 NaN Q
672 male 70.0 0 0 C.A. 24580 10.5000 NaN S
745 male 70.0 1 1 WE/P 5735 71.0000 B22 S
33 male 66.0 0 0 C.A. 24579 10.5000 NaN S
54 male 65.0 0 1 113509 61.9792 B30 C
280 male 65.0 0 0 336439 7.7500 NaN Q
-------------
PassengerId Survived Pclass Name Sex \
0 631 1 1 Barkworth, Mr. Algernon Henry Wilson male
1 852 0 3 Svensson, Mr. Johan male
2 494 0 1 Artagaveytia, Mr. Ramon male
3 97 0 1 Goldschmidt, Mr. George B male
4 117 0 3 Connors, Mr. Patrick male
5 673 0 2 Mitchell, Mr. Henry Michael male
6 746 0 1 Crosby, Capt. Edward Gifford male
7 34 0 2 Wheadon, Mr. Edward H male
8 55 0 1 Ostby, Mr. Engelhart Cornelius male
9 281 0 3 Duane, Mr. Frank male
10 457 0 1 Millet, Mr. Francis Davis male
Age SibSp Parch Ticket Fare Cabin Embarked
0 80.0 0 0 27042 30.0000 A23 S
1 74.0 0 0 347060 7.7750 NaN S
2 71.0 0 0 PC 17609 49.5042 NaN C
3 71.0 0 0 PC 17754 34.6542 A5 C
4 70.5 0 0 370369 7.7500 NaN Q
5 70.0 0 0 C.A. 24580 10.5000 NaN S
6 70.0 1 1 WE/P 5735 71.0000 B22 S
7 66.0 0 0 C.A. 24579 10.5000 NaN S
8 65.0 0 1 113509 61.9792 B30 C
9 65.0 0 0 336439 7.7500 NaN Q
10 65.0 0 0 13509 26.5500 E38 S
3.3 函数操作
自定义apply函数
# This function returns the hundredth item from a seriesdefhundredth_row(column):# Extract the hundredth item
hundredth_item = column.loc[99]return hundredth_item
# Return the hundredth item from each column
hundredth_row = titanic_survival.apply(hundredth_row)print(hundredth_row)
PassengerId 100
Survived 0
Pclass 2
Name Kantor, Mr. Sinai
Sex male
Age 34.0
SibSp 1
Parch 0
Ticket 244367
Fare 26.0
Cabin NaN
Embarked S
dtype: object
PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
#By passing in the axis=1 argument, we can use the DataFrame.apply() method to iterate over rows instead of columns.# 数据转换defwhich_class(row):
pclass = row['Pclass']if pd.isnull(pclass):return"Unknown"elif pclass ==1:return"First Class"elif pclass ==2:return"Second Class"elif pclass ==3:return"Third Class"
classes = titanic_survival.apply(which_class, axis=1)print(classes)
0 Third Class
1 First Class
2 Third Class
3 First Class
4 Third Class
...
886 Second Class
887 First Class
888 Third Class
889 First Class
890 Third Class
Length: 891, dtype: object
# 年龄defis_minor(row):if row["Age"]<18:returnTrueelse:returnFalse
minors = titanic_survival.apply(is_minor, axis=1)print(minors)defgenerate_age_label(row):
age = row["Age"]if pd.isnull(age):return"unknown"elif age <18:return"minor"else:return"adult"
age_labels = titanic_survival.apply(generate_age_label, axis=1)print(age_labels)
<class 'pandas.core.series.Series'>
0 Avengers: Age of Ultron (2015)
1 Cinderella (2015)
2 Ant-Man (2015)
3 Do You Believe? (2015)
4 Hot Tub Time Machine 2 (2015)
Name: FILM, dtype: object
0 74
1 85
2 80
3 18
4 14
Name: RottenTomatoes, dtype: int64
新建Series结构
# Import the Series object from pandasfrom pandas import Series
film_names = series_film.values
print(type(film_names))print("film_names:\n", film_names)
rt_scores = series_rt.values
print(rt_scores)
<class 'numpy.ndarray'>
film_names:
['Avengers: Age of Ultron (2015)' 'Cinderella (2015)' 'Ant-Man (2015)'
'Do You Believe? (2015)' 'Hot Tub Time Machine 2 (2015)'
'The Water Diviner (2015)' 'Irrational Man (2015)' 'Top Five (2014)'
'Shaun the Sheep Movie (2015)' 'Love & Mercy (2015)'
'Far From The Madding Crowd (2015)' 'Black Sea (2015)' 'Leviathan (2014)'
'Unbroken (2014)' 'The Imitation Game (2014)' 'Taken 3 (2015)'
'Ted 2 (2015)' 'Southpaw (2015)'
'Night at the Museum: Secret of the Tomb (2014)' 'Pixels (2015)'
'McFarland, USA (2015)' 'Insidious: Chapter 3 (2015)'
'The Man From U.N.C.L.E. (2015)' 'Run All Night (2015)'
'Trainwreck (2015)' 'Selma (2014)' 'Ex Machina (2015)'
'Still Alice (2015)' 'Wild Tales (2014)' 'The End of the Tour (2015)'
'Red Army (2015)' 'When Marnie Was There (2015)'
'The Hunting Ground (2015)' 'The Boy Next Door (2015)' 'Aloha (2015)'
'The Loft (2015)' '5 Flights Up (2015)' 'Welcome to Me (2015)'
'Saint Laurent (2015)' 'Maps to the Stars (2015)'
"I'll See You In My Dreams (2015)" 'Timbuktu (2015)' 'About Elly (2015)'
'The Diary of a Teenage Girl (2015)'
'Kingsman: The Secret Service (2015)' 'Tomorrowland (2015)'
'The Divergent Series: Insurgent (2015)' 'Annie (2014)'
'Fantastic Four (2015)' 'Terminator Genisys (2015)'
'Pitch Perfect 2 (2015)' 'Entourage (2015)' 'The Age of Adaline (2015)'
'Hot Pursuit (2015)' 'The DUFF (2015)' 'Black or White (2015)'
'Project Almanac (2015)' 'Ricki and the Flash (2015)'
'Seventh Son (2015)' 'Mortdecai (2015)' 'Unfinished Business (2015)'
'American Ultra (2015)' 'True Story (2015)' 'Child 44 (2015)'
'Dark Places (2015)' 'Birdman (2014)' 'The Gift (2015)'
'Unfriended (2015)' 'Monkey Kingdom (2015)' 'Mr. Turner (2014)'
'Seymour: An Introduction (2015)' 'The Wrecking Crew (2015)'
'American Sniper (2015)' 'Furious 7 (2015)'
'The Hobbit: The Battle of the Five Armies (2014)' 'San Andreas (2015)'
'Straight Outta Compton (2015)' 'Vacation (2015)' 'Chappie (2015)'
'Poltergeist (2015)' 'Paper Towns (2015)' 'Big Eyes (2014)'
'Blackhat (2015)' 'Self/less (2015)' 'Sinister 2 (2015)'
'Little Boy (2015)' 'Me and Earl and The Dying Girl (2015)'
'Maggie (2015)' 'Mad Max: Fury Road (2015)' 'Spy (2015)'
'The SpongeBob Movie: Sponge Out of Water (2015)' 'Paddington (2015)'
'Dope (2015)' 'What We Do in the Shadows (2015)' 'The Overnight (2015)'
'The Salt of the Earth (2015)' 'Song of the Sea (2014)'
'Fifty Shades of Grey (2015)' 'Get Hard (2015)' 'Focus (2015)'
'Jupiter Ascending (2015)' 'The Gallows (2015)'
'The Second Best Exotic Marigold Hotel (2015)' 'Strange Magic (2015)'
'The Gunman (2015)' 'Hitman: Agent 47 (2015)' 'Cake (2015)'
'The Vatican Tapes (2015)' 'A Little Chaos (2015)'
'The 100-Year-Old Man Who Climbed Out the Window and Disappeared (2015)'
'Escobar: Paradise Lost (2015)' 'Into the Woods (2014)'
'It Follows (2015)' 'Inherent Vice (2014)' 'A Most Violent Year (2014)'
"While We're Young (2015)" 'Clouds of Sils Maria (2015)'
'Testament of Youth (2015)' 'Infinitely Polar Bear (2015)'
'Phoenix (2015)' 'The Wolfpack (2015)'
'The Stanford Prison Experiment (2015)' 'Tangerine (2015)'
'Magic Mike XXL (2015)' 'Home (2015)' 'The Wedding Ringer (2015)'
'Woman in Gold (2015)' 'The Last Five Years (2015)'
'Mission: Impossible – Rogue Nation (2015)' 'Amy (2015)'
'Jurassic World (2015)' 'Minions (2015)' 'Max (2015)'
'Paul Blart: Mall Cop 2 (2015)' 'The Longest Ride (2015)'
'The Lazarus Effect (2015)' 'The Woman In Black 2 Angel of Death (2015)'
'Danny Collins (2015)' 'Spare Parts (2015)' 'Serena (2015)'
'Inside Out (2015)' 'Mr. Holmes (2015)' "'71 (2015)"
'Two Days, One Night (2014)' 'Gett: The Trial of Viviane Amsalem (2015)'
'Kumiko, The Treasure Hunter (2015)']
[ 74 85 80 18 14 63 42 86 99 89 84 82 99 51 90 9 46 59
50 17 79 59 68 60 85 99 92 88 96 92 96 89 92 10 19 11
52 71 51 60 94 99 97 95 75 50 30 27 9 26 67 32 54 8
71 39 34 64 12 12 11 46 45 26 26 92 93 60 94 98 100 93
72 81 61 50 90 27 30 31 55 72 34 20 13 20 81 54 97 93
78 98 87 96 82 96 99 25 29 57 26 16 62 17 17 7 49 13
40 67 52 71 96 73 90 83 89 81 80 99 84 84 95 62 45 27
52 60 92 97 71 54 35 5 31 14 22 77 52 18 98 87 97 97
100 87]
# int index is also aviable# 指定key,还有一个媒体的评分值。string字符name当成索引
series_custom = Series(rt_scores , index=film_names)
series_custom[['Minions (2015)','Leviathan (2014)']]
fiveten = series_custom[5:10]print(fiveten)
The Water Diviner (2015) 63
Irrational Man (2015) 42
Top Five (2014) 86
Shaun the Sheep Movie (2015) 99
Love & Mercy (2015) 89
dtype: int64
original_index:
['Avengers: Age of Ultron (2015)', 'Cinderella (2015)', 'Ant-Man (2015)', 'Do You Believe? (2015)', 'Hot Tub Time Machine 2 (2015)', 'The Water Diviner (2015)', 'Irrational Man (2015)', 'Top Five (2014)', 'Shaun the Sheep Movie (2015)', 'Love & Mercy (2015)', 'Far From The Madding Crowd (2015)', 'Black Sea (2015)', 'Leviathan (2014)', 'Unbroken (2014)', 'The Imitation Game (2014)', 'Taken 3 (2015)', 'Ted 2 (2015)', 'Southpaw (2015)', 'Night at the Museum: Secret of the Tomb (2014)', 'Pixels (2015)', 'McFarland, USA (2015)', 'Insidious: Chapter 3 (2015)', 'The Man From U.N.C.L.E. (2015)', 'Run All Night (2015)', 'Trainwreck (2015)', 'Selma (2014)', 'Ex Machina (2015)', 'Still Alice (2015)', 'Wild Tales (2014)', 'The End of the Tour (2015)', 'Red Army (2015)', 'When Marnie Was There (2015)', 'The Hunting Ground (2015)', 'The Boy Next Door (2015)', 'Aloha (2015)', 'The Loft (2015)', '5 Flights Up (2015)', 'Welcome to Me (2015)', 'Saint Laurent (2015)', 'Maps to the Stars (2015)', "I'll See You In My Dreams (2015)", 'Timbuktu (2015)', 'About Elly (2015)', 'The Diary of a Teenage Girl (2015)', 'Kingsman: The Secret Service (2015)', 'Tomorrowland (2015)', 'The Divergent Series: Insurgent (2015)', 'Annie (2014)', 'Fantastic Four (2015)', 'Terminator Genisys (2015)', 'Pitch Perfect 2 (2015)', 'Entourage (2015)', 'The Age of Adaline (2015)', 'Hot Pursuit (2015)', 'The DUFF (2015)', 'Black or White (2015)', 'Project Almanac (2015)', 'Ricki and the Flash (2015)', 'Seventh Son (2015)', 'Mortdecai (2015)', 'Unfinished Business (2015)', 'American Ultra (2015)', 'True Story (2015)', 'Child 44 (2015)', 'Dark Places (2015)', 'Birdman (2014)', 'The Gift (2015)', 'Unfriended (2015)', 'Monkey Kingdom (2015)', 'Mr. Turner (2014)', 'Seymour: An Introduction (2015)', 'The Wrecking Crew (2015)', 'American Sniper (2015)', 'Furious 7 (2015)', 'The Hobbit: The Battle of the Five Armies (2014)', 'San Andreas (2015)', 'Straight Outta Compton (2015)', 'Vacation (2015)', 'Chappie (2015)', 'Poltergeist (2015)', 'Paper Towns (2015)', 'Big Eyes (2014)', 'Blackhat (2015)', 'Self/less (2015)', 'Sinister 2 (2015)', 'Little Boy (2015)', 'Me and Earl and The Dying Girl (2015)', 'Maggie (2015)', 'Mad Max: Fury Road (2015)', 'Spy (2015)', 'The SpongeBob Movie: Sponge Out of Water (2015)', 'Paddington (2015)', 'Dope (2015)', 'What We Do in the Shadows (2015)', 'The Overnight (2015)', 'The Salt of the Earth (2015)', 'Song of the Sea (2014)', 'Fifty Shades of Grey (2015)', 'Get Hard (2015)', 'Focus (2015)', 'Jupiter Ascending (2015)', 'The Gallows (2015)', 'The Second Best Exotic Marigold Hotel (2015)', 'Strange Magic (2015)', 'The Gunman (2015)', 'Hitman: Agent 47 (2015)', 'Cake (2015)', 'The Vatican Tapes (2015)', 'A Little Chaos (2015)', 'The 100-Year-Old Man Who Climbed Out the Window and Disappeared (2015)', 'Escobar: Paradise Lost (2015)', 'Into the Woods (2014)', 'It Follows (2015)', 'Inherent Vice (2014)', 'A Most Violent Year (2014)', "While We're Young (2015)", 'Clouds of Sils Maria (2015)', 'Testament of Youth (2015)', 'Infinitely Polar Bear (2015)', 'Phoenix (2015)', 'The Wolfpack (2015)', 'The Stanford Prison Experiment (2015)', 'Tangerine (2015)', 'Magic Mike XXL (2015)', 'Home (2015)', 'The Wedding Ringer (2015)', 'Woman in Gold (2015)', 'The Last Five Years (2015)', 'Mission: Impossible – Rogue Nation (2015)', 'Amy (2015)', 'Jurassic World (2015)', 'Minions (2015)', 'Max (2015)', 'Paul Blart: Mall Cop 2 (2015)', 'The Longest Ride (2015)', 'The Lazarus Effect (2015)', 'The Woman In Black 2 Angel of Death (2015)', 'Danny Collins (2015)', 'Spare Parts (2015)', 'Serena (2015)', 'Inside Out (2015)', 'Mr. Holmes (2015)', "'71 (2015)", 'Two Days, One Night (2014)', 'Gett: The Trial of Viviane Amsalem (2015)', 'Kumiko, The Treasure Hunter (2015)']
sorted_by_index:
'71 (2015) 97
5 Flights Up (2015) 52
A Little Chaos (2015) 40
A Most Violent Year (2014) 90
About Elly (2015) 97
..
What We Do in the Shadows (2015) 96
When Marnie Was There (2015) 89
While We're Young (2015) 83
Wild Tales (2014) 96
Woman in Gold (2015) 52
Length: 146, dtype: int64
Paul Blart: Mall Cop 2 (2015) 5
Hitman: Agent 47 (2015) 7
Hot Pursuit (2015) 8
Fantastic Four (2015) 9
Taken 3 (2015) 9
The Boy Next Door (2015) 10
The Loft (2015) 11
Unfinished Business (2015) 11
Mortdecai (2015) 12
Seventh Son (2015) 12
dtype: int64
#The values in a Series object are treated as an ndarray, the core data type in NumPyimport numpy as np
# Add each value with each otherprint(np.add(series_custom, series_custom))# Apply sine function to each value
np.sin(series_custom)# Return the highest value (will return a single value not a Series)
np.max(series_custom)
Avengers: Age of Ultron (2015) 148
Cinderella (2015) 170
Ant-Man (2015) 160
Do You Believe? (2015) 36
Hot Tub Time Machine 2 (2015) 28
...
Mr. Holmes (2015) 174
'71 (2015) 194
Two Days, One Night (2014) 194
Gett: The Trial of Viviane Amsalem (2015) 200
Kumiko, The Treasure Hunter (2015) 174
Length: 146, dtype: int64
100
#will actually return a Series object with a boolean value for each film
series_custom >50
series_greater_than_50 = series_custom[series_custom >50]
criteria_one = series_custom >50
criteria_two = series_custom <75
both_criteria = series_custom[criteria_one & criteria_two]print(both_criteria)
Avengers: Age of Ultron (2015) 74
The Water Diviner (2015) 63
Unbroken (2014) 51
Southpaw (2015) 59
Insidious: Chapter 3 (2015) 59
The Man From U.N.C.L.E. (2015) 68
Run All Night (2015) 60
5 Flights Up (2015) 52
Welcome to Me (2015) 71
Saint Laurent (2015) 51
Maps to the Stars (2015) 60
Pitch Perfect 2 (2015) 67
The Age of Adaline (2015) 54
The DUFF (2015) 71
Ricki and the Flash (2015) 64
Unfriended (2015) 60
American Sniper (2015) 72
The Hobbit: The Battle of the Five Armies (2014) 61
Paper Towns (2015) 55
Big Eyes (2014) 72
Maggie (2015) 54
Focus (2015) 57
The Second Best Exotic Marigold Hotel (2015) 62
The 100-Year-Old Man Who Climbed Out the Window and Disappeared (2015) 67
Escobar: Paradise Lost (2015) 52
Into the Woods (2014) 71
Inherent Vice (2014) 73
Magic Mike XXL (2015) 62
Woman in Gold (2015) 52
The Last Five Years (2015) 60
Jurassic World (2015) 71
Minions (2015) 54
Spare Parts (2015) 52
dtype: int64
FILM
Avengers: Age of Ultron (2015) 80.0
Cinderella (2015) 82.5
Ant-Man (2015) 85.0
Do You Believe? (2015) 51.0
Hot Tub Time Machine 2 (2015) 21.0
...
Mr. Holmes (2015) 82.5
'71 (2015) 89.5
Two Days, One Night (2014) 87.5
Gett: The Trial of Viviane Amsalem (2015) 90.5
Kumiko, The Treasure Hunter (2015) 75.0
Length: 146, dtype: float64
#will return a new DataFrame that is indexed by the values in the specified column #and will drop that column from the DataFrame#without the FILM column dropped
fandango = pd.read_csv('../../data/fandango_score_comparison.csv')print(type(fandango))# 电影名字作为索引
fandango_films = fandango.set_index('FILM', drop=False)print(fandango_films.index)
<class 'pandas.core.frame.DataFrame'>
Index(['Avengers: Age of Ultron (2015)', 'Cinderella (2015)', 'Ant-Man (2015)',
'Do You Believe? (2015)', 'Hot Tub Time Machine 2 (2015)',
'The Water Diviner (2015)', 'Irrational Man (2015)', 'Top Five (2014)',
'Shaun the Sheep Movie (2015)', 'Love & Mercy (2015)',
...
'The Woman In Black 2 Angel of Death (2015)', 'Danny Collins (2015)',
'Spare Parts (2015)', 'Serena (2015)', 'Inside Out (2015)',
'Mr. Holmes (2015)', ''71 (2015)', 'Two Days, One Night (2014)',
'Gett: The Trial of Viviane Amsalem (2015)',
'Kumiko, The Treasure Hunter (2015)'],
dtype='object', name='FILM', length=146)
# Slice using either bracket notation or loc[]#字符也可以进行切片
fandango_films["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]
fandango_films.loc["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]# Specific movie
fandango_films.loc['Kumiko, The Treasure Hunter (2015)']# Selecting list of movies
movies =['Kumiko, The Treasure Hunter (2015)','Do You Believe? (2015)','Ant-Man (2015)']
fandango_films.loc[movies]#When selecting multiple rows, a DataFrame is returned, #but when selecting an individual row, a Series object is returned instead
FILM
RottenTomatoes
RottenTomatoes_User
Metacritic
Metacritic_User
IMDB
Fandango_Stars
Fandango_Ratingvalue
RT_norm
RT_user_norm
...
IMDB_norm
RT_norm_round
RT_user_norm_round
Metacritic_norm_round
Metacritic_user_norm_round
IMDB_norm_round
Metacritic_user_vote_count
IMDB_user_vote_count
Fandango_votes
Fandango_Difference
FILM
Kumiko, The Treasure Hunter (2015)
Kumiko, The Treasure Hunter (2015)
87
63
68
6.4
6.7
3.5
3.5
4.35
3.15
...
3.35
4.5
3.0
3.5
3.0
3.5
19
5289
41
0.0
Do You Believe? (2015)
Do You Believe? (2015)
18
84
22
4.7
5.4
5.0
4.5
0.90
4.20
...
2.70
1.0
4.0
1.0
2.5
2.5
31
3136
1793
0.5
Ant-Man (2015)
Ant-Man (2015)
80
90
64
8.1
7.8
5.0
4.5
4.00
4.50
...
3.90
4.0
4.5
3.0
4.0
4.0
627
103660
12055
0.5
3 rows × 22 columns
#The apply() method in Pandas allows us to specify Python logic#The apply() method requires you to pass in a vectorized operation #that can be applied over each Series object.import numpy as np
# returns the data types as a Series
types = fandango_films.dtypes
print(types)# filter data types to just floats, index attributes returns just column names
float_columns = types[types.values =='float64'].index
# use bracket notation to filter columns to just float columns
float_df = fandango_films[float_columns]print(float_df)# `x` is a Series object representing a column
deviations = float_df.apply(lambda x: np.std(x))#print(deviations)
FILM object
RottenTomatoes int64
RottenTomatoes_User int64
Metacritic int64
Metacritic_User float64
IMDB float64
Fandango_Stars float64
Fandango_Ratingvalue float64
RT_norm float64
RT_user_norm float64
Metacritic_norm float64
Metacritic_user_nom float64
IMDB_norm float64
RT_norm_round float64
RT_user_norm_round float64
Metacritic_norm_round float64
Metacritic_user_norm_round float64
IMDB_norm_round float64
Metacritic_user_vote_count int64
IMDB_user_vote_count int64
Fandango_votes int64
Fandango_Difference float64
dtype: object
Metacritic_User IMDB \
FILM
Avengers: Age of Ultron (2015) 7.1 7.8
Cinderella (2015) 7.5 7.1
Ant-Man (2015) 8.1 7.8
Do You Believe? (2015) 4.7 5.4
Hot Tub Time Machine 2 (2015) 3.4 5.1
... ... ...
Mr. Holmes (2015) 7.9 7.4
'71 (2015) 7.5 7.2
Two Days, One Night (2014) 8.8 7.4
Gett: The Trial of Viviane Amsalem (2015) 7.3 7.8
Kumiko, The Treasure Hunter (2015) 6.4 6.7
Fandango_Stars \
FILM
Avengers: Age of Ultron (2015) 5.0
Cinderella (2015) 5.0
Ant-Man (2015) 5.0
Do You Believe? (2015) 5.0
Hot Tub Time Machine 2 (2015) 3.5
... ...
Mr. Holmes (2015) 4.0
'71 (2015) 3.5
Two Days, One Night (2014) 3.5
Gett: The Trial of Viviane Amsalem (2015) 3.5
Kumiko, The Treasure Hunter (2015) 3.5
Fandango_Ratingvalue RT_norm \
FILM
Avengers: Age of Ultron (2015) 4.5 3.70
Cinderella (2015) 4.5 4.25
Ant-Man (2015) 4.5 4.00
Do You Believe? (2015) 4.5 0.90
Hot Tub Time Machine 2 (2015) 3.0 0.70
... ... ...
Mr. Holmes (2015) 4.0 4.35
'71 (2015) 3.5 4.85
Two Days, One Night (2014) 3.5 4.85
Gett: The Trial of Viviane Amsalem (2015) 3.5 5.00
Kumiko, The Treasure Hunter (2015) 3.5 4.35
RT_user_norm Metacritic_norm \
FILM
Avengers: Age of Ultron (2015) 4.30 3.30
Cinderella (2015) 4.00 3.35
Ant-Man (2015) 4.50 3.20
Do You Believe? (2015) 4.20 1.10
Hot Tub Time Machine 2 (2015) 1.40 1.45
... ... ...
Mr. Holmes (2015) 3.90 3.35
'71 (2015) 4.10 4.15
Two Days, One Night (2014) 3.90 4.45
Gett: The Trial of Viviane Amsalem (2015) 4.05 4.50
Kumiko, The Treasure Hunter (2015) 3.15 3.40
Metacritic_user_nom IMDB_norm \
FILM
Avengers: Age of Ultron (2015) 3.55 3.90
Cinderella (2015) 3.75 3.55
Ant-Man (2015) 4.05 3.90
Do You Believe? (2015) 2.35 2.70
Hot Tub Time Machine 2 (2015) 1.70 2.55
... ... ...
Mr. Holmes (2015) 3.95 3.70
'71 (2015) 3.75 3.60
Two Days, One Night (2014) 4.40 3.70
Gett: The Trial of Viviane Amsalem (2015) 3.65 3.90
Kumiko, The Treasure Hunter (2015) 3.20 3.35
RT_norm_round RT_user_norm_round \
FILM
Avengers: Age of Ultron (2015) 3.5 4.5
Cinderella (2015) 4.5 4.0
Ant-Man (2015) 4.0 4.5
Do You Believe? (2015) 1.0 4.0
Hot Tub Time Machine 2 (2015) 0.5 1.5
... ... ...
Mr. Holmes (2015) 4.5 4.0
'71 (2015) 5.0 4.0
Two Days, One Night (2014) 5.0 4.0
Gett: The Trial of Viviane Amsalem (2015) 5.0 4.0
Kumiko, The Treasure Hunter (2015) 4.5 3.0
Metacritic_norm_round \
FILM
Avengers: Age of Ultron (2015) 3.5
Cinderella (2015) 3.5
Ant-Man (2015) 3.0
Do You Believe? (2015) 1.0
Hot Tub Time Machine 2 (2015) 1.5
... ...
Mr. Holmes (2015) 3.5
'71 (2015) 4.0
Two Days, One Night (2014) 4.5
Gett: The Trial of Viviane Amsalem (2015) 4.5
Kumiko, The Treasure Hunter (2015) 3.5
Metacritic_user_norm_round \
FILM
Avengers: Age of Ultron (2015) 3.5
Cinderella (2015) 4.0
Ant-Man (2015) 4.0
Do You Believe? (2015) 2.5
Hot Tub Time Machine 2 (2015) 1.5
... ...
Mr. Holmes (2015) 4.0
'71 (2015) 4.0
Two Days, One Night (2014) 4.5
Gett: The Trial of Viviane Amsalem (2015) 3.5
Kumiko, The Treasure Hunter (2015) 3.0
IMDB_norm_round \
FILM
Avengers: Age of Ultron (2015) 4.0
Cinderella (2015) 3.5
Ant-Man (2015) 4.0
Do You Believe? (2015) 2.5
Hot Tub Time Machine 2 (2015) 2.5
... ...
Mr. Holmes (2015) 3.5
'71 (2015) 3.5
Two Days, One Night (2014) 3.5
Gett: The Trial of Viviane Amsalem (2015) 4.0
Kumiko, The Treasure Hunter (2015) 3.5
Fandango_Difference
FILM
Avengers: Age of Ultron (2015) 0.5
Cinderella (2015) 0.5
Ant-Man (2015) 0.5
Do You Believe? (2015) 0.5
Hot Tub Time Machine 2 (2015) 0.5
... ...
Mr. Holmes (2015) 0.0
'71 (2015) 0.0
Two Days, One Night (2014) 0.0
Gett: The Trial of Viviane Amsalem (2015) 0.0
Kumiko, The Treasure Hunter (2015) 0.0
[146 rows x 15 columns]
FILM
Avengers: Age of Ultron (2015) 0.375
Cinderella (2015) 0.125
Ant-Man (2015) 0.225
Do You Believe? (2015) 0.925
Hot Tub Time Machine 2 (2015) 0.150
...
Mr. Holmes (2015) 0.025
'71 (2015) 0.175
Two Days, One Night (2014) 0.250
Gett: The Trial of Viviane Amsalem (2015) 0.200
Kumiko, The Treasure Hunter (2015) 0.025
Length: 146, dtype: float64