生肖属相单变量分析

import pandas as pd
import numpy as np
f = open(r'ft_zodiac.txt', encoding='utf-8')
ft_zodiac = pd.read_csv(f)  
print(ft_zodiac.shape)
ft_zodiac.head()
(23519, 4)
0order_idchinese_zodiaczodiac
00100000081567592448处女座
10100000467565182976双子座
20100000530945323008射手座
30100000556765458432摩羯座
40100000598171623424水瓶座

pd15作为好坏的分割节点。>15 为坏人,<15为好人?

15天以上的人为坏,5天以内的人为好。

l = open(r'zodiac_label.txt')
zodiac_label=pd.read_csv(l)
zodiac_label.head()
order_idoverdue_daysrepay_timelabel
010000008156759244802018-07-090
110000046756518297612018-07-090
210000053094532300802018-07-090
310000055676545843202018-07-090
410000059817162342402018-07-090
set(zodiac_label.label)
{0, 1, 2}
# 剔除不等于2的
ft_label = zodiac_label[zodiac_label['label'] != 2]
ft_label.head()
order_idoverdue_daysrepay_timelabel
010000008156759244802018-07-090
110000046756518297612018-07-090
210000053094532300802018-07-090
310000055676545843202018-07-090
410000059817162342402018-07-090
set(ft_label.label)
{0, 1}
data = pd.merge(ft_label,ft_zodiac,on = 'order_id',how = 'inner')
data.head()
order_idoverdue_daysrepay_timelabel0chinese_zodiaczodiac
010000008156759244802018-07-0900处女座
110000046756518297612018-07-0900双子座
210000053094532300802018-07-0900射手座
310000055676545843202018-07-0900摩羯座
410000059817162342402018-07-0900水瓶座

badrate = bad/toal

zodiac_list = set(data.zodiac)
zodiac_list
{'双子座',
 '双鱼座',
 '处女座',
 '天秤座',
 '天蝎座',
 '射手座',
 '巨蟹座',
 '摩羯座',
 '水瓶座',
 '狮子座',
 '白羊座',
 '金牛座'}
chinese_zodiac_list = set(data.chinese_zodiac)
chinese_zodiac_list
{'兔', '牛', '狗', '猪', '猴', '羊', '虎', '蛇', '马', '鸡', '鼠', '龙'}
# 星座
zodiac_badrate = {}
for x in zodiac_list:
    
    a = data[data.zodiac == x]
    
    bad = a[a.label == 1]['label'].count()  # 坏的计数
    good = a[a.label == 0]['label'].count() # 好的计数
    
    zodiac_badrate[x] = bad/(bad+good)
zodiac_badrate
{'双子座': 0.1312410841654779,
 '巨蟹座': 0.1408351026185421,
 '狮子座': 0.12760416666666666,
 '射手座': 0.14480286738351256,
 '水瓶座': 0.140117994100295,
 '白羊座': 0.13455414012738853,
 '双鱼座': 0.14873646209386282,
 '处女座': 0.13035143769968052,
 '天秤座': 0.12461252324860508,
 '天蝎座': 0.12005028284098052,
 '摩羯座': 0.12920489296636087,
 '金牛座': 0.12259059367771781}
f = zip(zodiac_badrate.keys(), zodiac_badrate.values())
f = sorted(f, key = lambda x : x[1], reverse = True )
zodiac_badrate = pd.DataFrame(f)
zodiac_badrate.columns = pd.Series(['星座', 'badrate'])
zodiac_badrate
星座badrate
0双鱼座0.148736
1射手座0.144803
2巨蟹座0.140835
3水瓶座0.140118
4白羊座0.134554
5双子座0.131241
6处女座0.130351
7摩羯座0.129205
8狮子座0.127604
9天秤座0.124613
10金牛座0.122591
11天蝎座0.120050
from pyecharts import Line
x = zodiac_badrate['星座']
y = zodiac_badrate['badrate']
line = Line('星座')
line.add(1, x, y)
<div id="c56416b4b8514d2780bb35f9e761fcf5" style="width:800px;height:400px;"></div>
# 生肖
chinese_zodiac_badrate = {}
for x in chinese_zodiac_list:
    
    a = data[data.chinese_zodiac == x]
    
    bad = a[a.label == 1]['label'].count()  # 好的计数
    good = a[a.label == 0]['label'].count()  # 坏的计数
    
    chinese_zodiac_badrate[x] = bad/(bad+good)
chinese_zodiac_badrate
{'猪': 0.14269406392694065,
 '牛': 0.1578112609040444,
 '虎': 0.15165876777251186,
 '龙': 0.1439084219133279,
 '鼠': 0.1340602950609365,
 '兔': 0.1502843216896832,
 '鸡': 0.12846998063266624,
 '蛇': 0.12789827973074047,
 '羊': 0.11335403726708075,
 '猴': 0.12008141112618724,
 '马': 0.12053872053872054,
 '狗': 0.11052009456264776}
f = zip(chinese_zodiac_badrate.keys(),chinese_zodiac_badrate.values())
f = sorted(f,key = lambda x : x[1],reverse = True )
chinese_zodiac_badrate = pd.DataFrame(f)
chinese_zodiac_badrate.columns = pd.Series(['生肖','badrate'])
chinese_zodiac_badrate
生肖badrate
00.157811
10.151659
20.150284
30.143908
40.142694
50.134060
60.128470
70.127898
80.120539
90.120081
100.113354
110.110520
from pyecharts import Line
x = chinese_zodiac_badrate['生肖']
y = chinese_zodiac_badrate['badrate']
line = Line('生肖')
line.add(1,x,y)
<div id="8801efc233e94477a9d56e1162e60a2b" style="width:800px;height:400px;"></div>

转载于:https://www.cnblogs.com/chenxiangzhen/p/10902219.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值