pandas常用方法函数实战
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 11 11:37:55 2021
@author: Administrator
"""
import numpy as np
import pandas as pd
#####################################,1,导入数据,
pt = 'D:\\xxx.xlsx'
df1 = pd.read_excel(pt,sheet_name='数据源1')
df1['分数区间'].value_counts()
df1['分数区间'] = df1['分数区间'].replace("(0.817.1]","(0.817,1]")
df1.rename(columns={'分数区间':'old分数区间'},inplace=True)
df1.columns
df1.info()
fenshu_map = df1[['old分数区间','分数区间']].drop_duplicates(subset=['old分数区间','分数区间'], keep='last').set_index('分数区间').to_dict(orient='dict').get('old分数区间')
fenshu_map.to_dict().values()
sss=fenshu_map.values()
####################################,2,处理数据,只有大众的
#风险等级维度
var='风险等级'
var='分数区间'
#1)预授信
df1["只有大众预授信"] = df1.apply( lambda x:"是" if (x.大众资质==0 and x.瓜子资质==0) else "否" , axis = 1)
df1['分数区间']=df1['old分数区间'].str.split(",",1).map(lambda x:float(x[0][1:]))
#df1['分数区间排序']=df1['分数区间'].rank()
a1 = df1.groupby(var).apply( lambda x:(x['只有大众预授信']=='否').sum() )
a2 = df1.groupby(var).apply( lambda x:(x['只有大众预授信']=='是').sum() )
a3=pd.concat([a1,a2],axis=1)
a3['总计']=a3[0] + a3[1]
a3.loc['行累加']=a3.apply(lambda x: x.sum(),axis=0)
a3.sort_index(ascending=False,inplace=True)
a3.columns=['否','是','总计']
a3['是列累加']=a3['是'].cumsum()
a3['预授信累计占比']=a3['是列累加']/a3.loc['行累加','总计']
a3.drop(['是列累加'],axis=1,inplace=True)
dazong_risk_ysx = a3
#2)路由通过拒绝,
a1 = df1.groupby(var).apply( lambda x:(x['大众审核结果']==2).sum() )
a2 = df1.groupby(var).apply( lambda x:(x['大众审核结果']==3).sum() )
a3=pd.concat([a1,a2],axis=1)
a3['总计']=a3[0] + a3[1]
a3.sort_index(ascending=False,inplace=True)
a3.columns=['否','是','总计']
a3.loc['行累加']=a3.apply(lambda x: x.sum(),axis=0)
a3['是列累加']=a3['否'].cumsum()
a3['总计累加']=a3['总计'].cumsum()
a3['预授信累计占比']=a3['是列累加']/a3['总计累加']
a3.drop(['是列累加'],axis=1,inplace=True)
a3.drop(['总计累加'],axis=1,inplace=True)
dazong_risk_shenhe = a3
#3)真实通过拒绝
a1 = df1.groupby(var).apply( lambda x:(x['大众审核结果']==2).sum() )
a2 = df1.groupby(var).apply( lambda x:(x['大众真实拒绝']==1).sum() )
a3=pd.concat([a1,a2],axis=1)
a3['总计']=a3[0] + a3[1]
a3.sort_index(ascending=False,inplace=True)
a3.columns=['否','是','总计']
a3.loc['行累加']=a3.apply(lambda x: x.sum(),axis=0)
a3['是列累加']=a3['否'].cumsum()
a3['总计累加']=a3['总计'].cumsum()
a3['预授信累计占比']=a3['是列累加']/a3['总计累加']
a3.drop(['是列累加'],axis=1,inplace=True)
a3.drop(['总计累加'],axis=1,inplace=True)
dazong_risk_true = a3
risk_weidu = pd.concat([dazong_risk_ysx,dazong_risk_shenhe,dazong_risk_true],axis=1)
#分数维度
fenshu_weidu = pd.concat([dazong_risk_ysx,dazong_risk_shenhe,dazong_risk_true],axis=1).reset_index()
fenshu_weidu['yinshe'] = fenshu_weidu['index'].map(lambda x: fenshu_map.get(x) )
#####################################输出结果
with pd.ExcelWriter('D:\\xxx.xlsx') as writer:
risk_weidu.to_excel(writer,sheet_name='xx',index=True)
fenshu_weidu.to_excel(writer,sheet_name='xx')