pandas数据分箱技术

第一种(把元素放进篮子里)

from random import randint
from pprint import pprint

# score_list = [randint(50,100) for _ in range(20)]
score_list = [63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
print(score_list)

low = []
mid = []
ok = []

def deal_with(v):
    global low
    global mid
    global ok
    if  50< v <70:
        low.append(v)
        return
    elif 70<=v<90:
        mid.append(v)
        return
    elif 90<=v<=100:
        ok.append(v)
        return

map(deal_with, score_list)
pprint({"low":low,"mid":mid,"ok":ok})
###########################################
#[63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
#[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
#{'low': [63, 67, 68, 60],'mid': [73, 84, 88, 70, 85, 83, 70, 77, 86, 83, 82],'ok': [97, 96, 95, 94, 100]}


第二种(把元素放进篮子里)

from random import randint
from pprint import pprint
from itertools import groupby

# score_list = [randint(50,100) for _ in range(20)]
score_list = [63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
print(score_list)

data1 = sorted(score_list)  #注意先排序
print(data1)

def deal_with_simple_data(d):
    if 50 < d < 70:
        return "low"
    elif 70 <= d < 90:
        return "mid"
    elif 90 <= d <= 100:
        return "ok"

itor_data = groupby(data1, key=deal_with_simple_data)
pprint([(name, list(eles)) for name,eles in itor_data])
########################################################
#[63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
#[60, 63, 67, 68, 70, 70, 73, 77, 82, 83, 83, 84, 85, 86, 88, 94, 95, 96, 97, 100]
#[('low', [60, 63, 67, 68]),
 #('mid', [70, 70, 73, 77, 82, 83, 83, 84, 85, 86, 88]),
 #('ok', [94, 95, 96, 97, 100])]


第三种(每个元素属于哪种篮子)

1.
import pandas as pd

score_list = [63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
print(score_list)

bins = [50,70,90,100]

res = pd.cut(score_list, bins)
res1 = pd.cut(score_list, bins, labels=["及格","中等","优秀"])
print(res)
print("---"*35)
print(res1)
#################################################
"""
[63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
[(50, 70], (50, 70], (70, 90], (70, 90], (70, 90], ..., (70, 90], (70, 90], (90, 100], (90, 100], (70, 90]]
Length: 20
Categories (3, interval[int64]): [(50, 70] < (70, 90] < (90, 100]]
---------------------------------------------------------------------------------------------------------
[及格, 及格, 中等, 中等, 中等, ..., 中等, 中等, 优秀, 优秀, 中等]
Length: 20
Categories (3, object): [及格 < 中等 < 优秀]
"""
2.

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值