十三. How do I change the data type of a Pandas Series
# coding: utf-8
# In[3]:
#13
import pandas as pd
# In[5]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
# In[6]:
drinks.head()
# In[7]:
drinks.dtypes
# In[8]:
drinks['beer_servings'] = drinks.beer_servings.astype(float)
# In[9]:
drinks.dtypes
# In[10]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry',dtype = {'beer_servings':float})
# In[11]:
#-----------------------#
# In[12]:
orders = pd.read_table('http://bit.ly/chiporders')
# In[13]:
orders.head()
# In[15]:
orders.dtypes
# In[19]:
orders.item_price.str.replace('$','').astype(float).mean()
# In[17]:
orders.head()
# In[21]:
orders.item_name.str.contains('Chicken').astype(int).head()
十四. When I should use a groupby in Pandas
# coding: utf-8
# In[1]:
#14
import pandas as pd
# In[2]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
# In[3]:
drinks.head()
# In[4]:
drinks.beer_servings.mean()
# In[8]:
drinks.groupby('continent').beer_servings.mean()
# In[12]:
drinks[drinks.continent == 'Africa'].beer_servings.mean()
# In[13]:
drinks.groupby('continent').beer_servings.max()
# In[14]:
drinks.groupby('continent').beer_servings.agg(['count','min','max','mean'])
# In[15]:
drinks.groupby('continent').mean()
# In[16]:
get_ipython().run_line_magic('matplotlib', 'inline')
# In[17]:
drinks.groupby('continent').mean().plot(kind='bar')
十五. How do I explore a Pandas Series
# coding: utf-8
# In[1]:
#15
import pandas as pd
# In[2]:
movies = pd.read_csv('http://bit.ly/imdbratings')
# In[3]:
movies.head()
# In[4]:
movies.dtypes
# In[5]:
movies.genre.describe()
# In[7]:
movies.genre.value_counts()
# In[9]:
movies.genre.value_counts(normalize = True)
# In[10]:
movies.genre.unique()
# In[11]:
movies.genre.nunique()
# In[12]:
pd.crosstab(movies.genre,movies.content_rating)
# In[13]:
movies.duration.describe()
# In[14]:
movies.duration.mean()
# In[15]:
movies.duration.value_counts()
# In[16]:
get_ipython().run_line_magic('matplotlib', 'inline')
# In[17]:
movies.duration.plot(kind = 'hist')
# In[18]:
movies.genre.value_counts().plot(kind='bar')
十六. How do I handle missing values in Pandas
# coding: utf-8
# In[1]:
#16
import pandas as pd
# In[2]:
ufo = pd.read_csv('http://bit.ly/uforeports')
# In[3]:
ufo.tail()
# In[5]:
ufo.isnull().tail()
# In[6]:
ufo.notnull().tail()
# In[7]:
ufo.isnull().sum()
# In[8]:
pd.Series([True,False,True]).sum()
# In[9]:
ufo[ufo.City.isnull()]
# In[11]:
ufo.shape
# In[12]:
ufo.dropna(how='any').shape
# In[13]:
ufo.shape
# In[14]:
ufo.dropna(how='all').shape
# In[15]:
ufo.dropna(subset = ['City','Shape Reported'], how='all').shape
# In[22]:
ufo['Shape Reported'].fillna(value = 'VARIOUS',inplace=True)
# In[23]:
ufo['Shape Reported'].value_counts(dropna = False)
十七. What do I need to know about Pandas index(part I)
# coding: utf-8
# In[1]:
#17
import pandas as pd
# In[2]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
# In[3]:
drinks.head()
# In[4]:
drinks.index
# In[5]:
drinks.columns
# In[6]:
drinks.shape
# In[7]:
pd.read_csv('http://bit.ly/movieusers',header = None,sep='|').head()
# In[8]:
drinks[drinks.continent == 'South America']
# In[9]:
drinks.loc[3,'beer_servings']
# In[10]:
#Add a new index
drinks.set_index('country',inplace = True)
# In[12]:
drinks.head()
# In[13]:
drinks.index
# In[14]:
drinks.columns
# In[15]:
drinks.shape
# In[16]:
drinks.loc['Andorra','wine_servings']
# In[18]:
drinks.head()
# In[19]:
drinks.index.name = None
drinks.head()
# In[20]:
drinks.index.name='country'
drinks.reset_index(inplace = True)
drinks.head()
# In[21]:
drinks.describe()
# In[22]:
drinks.describe().index
# In[23]:
drinks.describe().columns
# In[25]:
drinks.describe().loc['25%','spirit_servings']
十八. What do I need to know about Pandas index(part II)
# coding: utf-8
# In[2]:
#18
import pandas as pd
# In[3]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
# In[4]:
drinks.head()
# In[5]:
drinks.continent.head()
# In[6]:
drinks.set_index('country',inplace = True)
drinks.head()
# In[7]:
drinks.continent.head()
# In[8]:
drinks.continent.value_counts()
# In[9]:
drinks.continent.value_counts().values
# In[10]:
drinks.continent.value_counts()['Africa']
# In[13]:
drinks.continent.value_counts().sort_index()
# In[14]:
people = pd.Series([3000000,85000],index = ['Albania','Andorra'],name = 'population')
people
# In[15]:
#注意这里会自动寻找相关行去进行乘法
drinks.beer_servings * people
# In[17]:
#在原有的基础上添加新的列
pd.concat([drinks,people],axis = 1).head()