pandas_6

最新推荐文章于 2022-09-22 21:54:55 发布

超棒的小苏总

最新推荐文章于 2022-09-22 21:54:55 发布

阅读量153

点赞数

分类专栏： pandas教程

本文链接：https://blog.csdn.net/weixin_40718101/article/details/105884836

版权

pandas教程专栏收录该内容

6 篇文章 0 订阅

订阅专栏

1. a

import pandas as pd
import numpy as np
df_car = pd.read_csv('data/2002年-2018年上海机动车拍照拍卖.csv')
print(df_car.shape)
print(df_car.info())
print(df_car.head())
#(a) 找出哪轮拍卖的中标率首次小于 5%
df = df_car.copy()
df['percent'] = df.apply(lambda x:x['Total number of license issued']/x['Total number of applicants'],axis=1)

from dateutil.parser import parse
df.Date=df.Date.apply(lambda x:str(parse('-'.join([str(2000+int(x.split('-')[0])),x.split('-')[1]])))[:7])

df=df.sort_values(by='Date')
df.head()
for index, row in df.iterrows():
  if row['percent']<0.05:
        print(row['Date'])
        break
print(df.head(1))

b

df2=df.copy()
mulidx=pd.MultiIndex.from_tuples(df2.Date.apply(lambda x:x.split('-')))
df2.index=mulidx
df2=df2.reset_index()
df2=df2.rename({'level_0':'年份', 'level_1':'月份'},axis=1).drop(columns='Date')
print(df2.groupby('年份')['lowest price '].agg([('最大值','max'),('均值','mean'),('0.75分位数',lambda x:x.quantile(0.75))]))

c

df3 = df2.copy()
print(df3.head())

d

df4 = df3.copy()
newcolumns = ['年份']+list(df.columns[1:-2])
df4.set_index(newcolumns)
print(df4.head())

e

df5=df4[['年份','月份','lowest price ','avg price']].copy()
df5=df5.iloc[1:].reset_index()[['月份','lowest price ','avg price']].join(df5,rsuffix='_lastmonth',how='outer')
print(df5[((df5['lowest price ']-df5['lowest price _lastmonth'])*(df5['avg price']-df5['avg price_lastmonth']))<0][['年份','月份']])

f

df6=df4.copy()
mean_num=df6['Total number of license issued'].rolling(2).mean()
mean_num.index=range(1,len(mean_num)+1)
df6['mean_num_of_last_two_month']=mean_num[:202]
df6=df6.fillna(0)
df6['发行增益']=df6['Total number of license issued']-df6['mean_num_of_last_two_month']
print(df6.iloc[[df6['发行增益'].idxmax(),df6['发行增益'].idxmin()]])

#发行增益出现极大值的时间是2008.01,极小值的出现时间是2008.04

2.

import pandas as pd
import numpy as np
df = pd.read_csv('data/2007年-2019年俄罗斯货运航班运载量.csv')
print(df.head())
print(df.info())

a

df.groupby('Year')['Whole year'].sum()

b

df.groupby('Year')['Airport name'].apply(lambda x:str(set(sorted(list(x.values))))).value_counts()
df2=df.groupby('Year')['Airport name'].apply(lambda x:str(set(sorted(list(x.values))))).to_frame().reset_index()
df2['len']=df2['Airport name'].str.len()
print(df2)

c

df3=df[(df.Year<2016)&(df.Year>2009)]
print(df3.groupby('Year').apply(lambda x:len(x[x['Whole year']==0])/len(x)))

d

3

import pandas as pd
import numpy as np
df_death = pd.read_csv('data/美国死亡数.csv')
print(df_death.info)
df_diag  = pd.read_csv('data/美国确证数.csv')
print(df_diag.info)

a

print(df_death[['Population','2020/4/26']].corr())

b

df2 = df_diag[['Admin2','Province_State','2020/4/1']]
print((df2[df2['2020/4/1']==0].groupby('Province_State')['2020/4/1'].count()/df2.groupby('Province_State')['2020/4/1'].count()).fillna(0))

超棒的小苏总

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
pandas_6

1.aimport pandas as pdimport numpy as npdf_car = pd.read_csv('data/2002年-2018年上海机动车拍照拍卖.csv')print(df_car.shape)print(df_car.info())print(df_car.head())#(a) 找出哪轮拍卖的中标率首次小于 5%df = df_car.copy(...
复制链接

扫一扫