pandas_6

1.

a

import pandas as pd
import numpy as np
df_car = pd.read_csv('data/2002年-2018年上海机动车拍照拍卖.csv')
print(df_car.shape)
print(df_car.info())
print(df_car.head())
#(a) 找出哪轮拍卖的中标率首次小于 5%
df = df_car.copy()
df['percent'] = df.apply(lambda x:x['Total number of license issued']/x['Total number of applicants'],axis=1)
from dateutil.parser import parse
df.Date=df.Date.apply(lambda x:str(parse('-'.join([str(2000+int(x.split('-')[0])),x.split('-')[1]])))[:7])
df=df.sort_values(by='Date')
df.head()
for index, row in df.iterrows():
  if row['percent']<0.05:
        print(row['Date'])
        break
print(df.head(1))

b

df2=df.copy()
mulidx=pd.MultiIndex.from_tuples(df2.Date.apply(lambda x:x.split('-')))
df2.index=mulidx
df2=df2.reset_index()
df2=df2.rename({'level_0':'年份', 'level_1':'月份'},axis=1).drop(columns='Date')
print(df2.groupby('年份')['lowest price '].agg([('最大值','max'),('均值','mean'),('0.75分位数',lambda x:x.quantile(0.75))]))

c

df3 = df2.copy()
print(df3.head())

d

df4 = df3.copy()
newcolumns = ['年份']+list(df.columns[1:-2])
df4.set_index(newcolumns)
print(df4.head())

e

df5=df4[['年份','月份','lowest price ','avg price']].copy()
df5=df5.iloc[1:].reset_index()[['月份','lowest price ','avg price']].join(df5,rsuffix='_lastmonth',how='outer')
print(df5[((df5['lowest price ']-df5['lowest price _lastmonth'])*(df5['avg price']-df5['avg price_lastmonth']))<0][['年份','月份']])

f

df6=df4.copy()
mean_num=df6['Total number of license issued'].rolling(2).mean()
mean_num.index=range(1,len(mean_num)+1)
df6['mean_num_of_last_two_month']=mean_num[:202]
df6=df6.fillna(0)
df6['发行增益']=df6['Total number of license issued']-df6['mean_num_of_last_two_month']
print(df6.iloc[[df6['发行增益'].idxmax(),df6['发行增益'].idxmin()]])

#发行增益出现极大值的时间是2008.01,极小值的出现时间是2008.04

2.

import pandas as pd
import numpy as np
df = pd.read_csv('data/2007年-2019年俄罗斯货运航班运载量.csv')
print(df.head())
print(df.info())

a

df.groupby('Year')['Whole year'].sum()

b

df.groupby('Year')['Airport name'].apply(lambda x:str(set(sorted(list(x.values))))).value_counts()
df2=df.groupby('Year')['Airport name'].apply(lambda x:str(set(sorted(list(x.values))))).to_frame().reset_index()
df2['len']=df2['Airport name'].str.len()
print(df2)

c

df3=df[(df.Year<2016)&(df.Year>2009)]
print(df3.groupby('Year').apply(lambda x:len(x[x['Whole year']==0])/len(x)))

d

3

import pandas as pd
import numpy as np
df_death = pd.read_csv('data/美国死亡数.csv')
print(df_death.info)
df_diag  = pd.read_csv('data/美国确证数.csv')
print(df_diag.info)

a

print(df_death[['Population','2020/4/26']].corr())

b

df2 = df_diag[['Admin2','Province_State','2020/4/1']]
print((df2[df2['2020/4/1']==0].groupby('Province_State')['2020/4/1'].count()/df2.groupby('Province_State')['2020/4/1'].count()).fillna(0))
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值