1.
a
import pandas as pd
import numpy as np
df_car = pd.read_csv('data/2002年-2018年上海机动车拍照拍卖.csv')
print(df_car.shape)
print(df_car.info())
print(df_car.head())
#(a) 找出哪轮拍卖的中标率首次小于 5%
df = df_car.copy()
df['percent'] = df.apply(lambda x:x['Total number of license issued']/x['Total number of applicants'],axis=1)
from dateutil.parser import parse
df.Date=df.Date.apply(lambda x:str(parse('-'.join([str(2000+int(x.split('-')[0])),x.split('-')[1]])))[:7])
df=df.sort_values(by='Date')
df.head()
for index, row in df.iterrows():
if row['percent']<0.05:
print(row['Date'])
break
print(df.head(1))
b
df2=df.copy()
mulidx=pd.MultiIndex.from_tuples(df2.Date.apply(lambda x:x.split('-')))
df2.index=mulidx
df2=df2.reset_index()
df2=df2.rename({'level_0':'年份', 'level_1':'月份'},axis=1).drop(columns='Date')
print(df2.groupby('年份')['lowest price '].agg([('最大值','max'),('均值','mean'),('0.75分位数',lambda x:x.quantile(0.75))]))
c
df3 = df2.copy()
print(df3.head())
d
df4 = df3.copy()
newcolumns = ['年份']+list(df.columns[1:-2])
df4.set_index(newcolumns)
print(df4.head())
e
df5=df4[['年份','月份','lowest price ','avg price']].copy()
df5=df5.iloc[1:].reset_index()[['月份','lowest price ','avg price']].join(df5,rsuffix='_lastmonth',how='outer')
print(df5[((df5['lowest price ']-df5['lowest price _lastmonth'])*(df5['avg price']-df5['avg price_lastmonth']))<0][['年份','月份']])
f
df6=df4.copy()
mean_num=df6['Total number of license issued'].rolling(2).mean()
mean_num.index=range(1,len(mean_num)+1)
df6['mean_num_of_last_two_month']=mean_num[:202]
df6=df6.fillna(0)
df6['发行增益']=df6['Total number of license issued']-df6['mean_num_of_last_two_month']
print(df6.iloc[[df6['发行增益'].idxmax(),df6['发行增益'].idxmin()]])
#发行增益出现极大值的时间是2008.01,极小值的出现时间是2008.04
2.
import pandas as pd
import numpy as np
df = pd.read_csv('data/2007年-2019年俄罗斯货运航班运载量.csv')
print(df.head())
print(df.info())
a
df.groupby('Year')['Whole year'].sum()
b
df.groupby('Year')['Airport name'].apply(lambda x:str(set(sorted(list(x.values))))).value_counts()
df2=df.groupby('Year')['Airport name'].apply(lambda x:str(set(sorted(list(x.values))))).to_frame().reset_index()
df2['len']=df2['Airport name'].str.len()
print(df2)
c
df3=df[(df.Year<2016)&(df.Year>2009)]
print(df3.groupby('Year').apply(lambda x:len(x[x['Whole year']==0])/len(x)))
d
3
import pandas as pd
import numpy as np
df_death = pd.read_csv('data/美国死亡数.csv')
print(df_death.info)
df_diag = pd.read_csv('data/美国确证数.csv')
print(df_diag.info)
a
print(df_death[['Population','2020/4/26']].corr())
b
df2 = df_diag[['Admin2','Province_State','2020/4/1']]
print((df2[df2['2020/4/1']==0].groupby('Province_State')['2020/4/1'].count()/df2.groupby('Province_State')['2020/4/1'].count()).fillna(0))