Explore a pandas Series
import pandas as pd
movies = pd. read_csv( 'imdb_1000.csv' )
movies. head( )
star_rating title content_rating genre duration actors_list 0 9.3 The Shawshank Redemption R Crime 142 [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt... 1 9.2 The Godfather R Crime 175 [u'Marlon Brando', u'Al Pacino', u'James Caan'] 2 9.1 The Godfather: Part II R Crime 200 [u'Al Pacino', u'Robert De Niro', u'Robert Duv... 3 9.0 The Dark Knight PG-13 Action 152 [u'Christian Bale', u'Heath Ledger', u'Aaron E... 4 8.9 Pulp Fiction R Crime 154 [u'John Travolta', u'Uma Thurman', u'Samuel L....
movies. dtypes
star_rating float64
title object
content_rating object
genre object
duration int64
actors_list object
dtype: object
movies. genre
0 Crime
1 Crime
2 Crime
3 Action
4 Crime
...
974 Comedy
975 Adventure
976 Action
977 Horror
978 Crime
Name: genre, Length: 979, dtype: object
movies. genre. describe( )
count 979
unique 16
top Drama
freq 278
Name: genre, dtype: object
movies. genre. value_counts( )
Drama 278
Comedy 156
Action 136
Crime 124
Biography 77
Adventure 75
Animation 62
Horror 29
Mystery 16
Western 9
Thriller 5
Sci-Fi 5
Film-Noir 3
Family 2
History 1
Fantasy 1
Name: genre, dtype: int64
movies. genre. value_counts( normalize= True )
Drama 0.283963
Comedy 0.159346
Action 0.138917
Crime 0.126660
Biography 0.078652
Adventure 0.076609
Animation 0.063330
Horror 0.029622
Mystery 0.016343
Western 0.009193
Thriller 0.005107
Sci-Fi 0.005107
Film-Noir 0.003064
Family 0.002043
History 0.001021
Fantasy 0.001021
Name: genre, dtype: float64
type ( movies. genre. value_counts( ) )
pandas.core.series.Series
movies. genre. value_counts( ) . head( )
Drama 278
Comedy 156
Action 136
Crime 124
Biography 77
Name: genre, dtype: int64
movies. genre. unique( )
array(['Crime', 'Action', 'Drama', 'Western', 'Adventure', 'Biography',
'Comedy', 'Animation', 'Mystery', 'Horror', 'Film-Noir', 'Sci-Fi',
'History', 'Thriller', 'Family', 'Fantasy'], dtype=object)
movies. genre. nunique( )
16
pd. crosstab( movies. genre, movies. content_rating)
content_rating APPROVED G GP NC-17 NOT RATED PASSED PG PG-13 R TV-MA UNRATED X genre Action 3 1 1 0 4 1 11 44 67 0 3 0 Adventure 3 2 0 0 5 1 21 23 17 0 2 0 Animation 3 20 0 0 3 0 25 5 5 0 1 0 Biography 1 2 1 0 1 0 6 29 36 0 0 0 Comedy 9 2 1 1 16 3 23 23 73 0 4 1 Crime 6 0 0 1 7 1 6 4 87 0 11 1 Drama 12 3 0 4 24 1 25 55 143 1 9 1 Family 0 1 0 0 0 0 1 0 0 0 0 0 Fantasy 0 0 0 0 0 0 0 0 1 0 0 0 Film-Noir 1 0 0 0 1 0 0 0 0 0 1 0 History 0 0 0 0 0 0 0 0 0 0 1 0 Horror 2 0 0 1 1 0 1 2 16 0 5 1 Mystery 4 1 0 0 1 0 1 2 6 0 1 0 Sci-Fi 1 0 0 0 0 0 0 1 3 0 0 0 Thriller 1 0 0 0 0 0 1 0 3 0 0 0 Western 1 0 0 0 2 0 2 1 3 0 0 0
movies. duration. describe( )
count 979.000000
mean 120.979571
std 26.218010
min 64.000000
25% 102.000000
50% 117.000000
75% 134.000000
max 242.000000
Name: duration, dtype: float64
movies. duration. mean( )
120.97957099080695
movies. duration. value_counts( )
112 23
113 22
102 20
101 20
129 19
..
180 1
177 1
168 1
166 1
64 1
Name: duration, Length: 133, dtype: int64
Bonus time
% matplotlib inline
movies. duration. plot( kind= 'hist' )
<matplotlib.axes._subplots.AxesSubplot at 0x1cc39c5b488>
movies. genre. value_counts( ) . plot( kind= 'bar' )
<matplotlib.axes._subplots.AxesSubplot at 0x1cc3a3ea948>