类别处理及onehot练习

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

def func1(data):     #  定义一个数据框
    data.columns = ["color", "size", "price", "label"]
    print("加标题:\n", data, end="\n***************\n")
    print("label 类别:", np.unique(data['label']))
    lebel_mapping = {lebel: idx for idx, lebel in enumerate(np.unique(data['label']))}
    print("lebel_mapping", lebel_mapping)
    data['lebel'] = data['label'].map(lebel_mapping)
    onehot_label = []
    for val in data['label']:
        onehot_chrs = [0] * len(lebel_mapping)
        onehot_chrs[lebel_mapping.get(val)] = 1
        onehot_label.append(onehot_chrs)
    print(np.mat(onehot_label))

def func2(data):
    le = LabelEncoder()
    label_data = le.fit_transform(data['label'])
    data['label'] = label_data
    print(data)
    onehot_encoder = OneHotEncoder(sparse=False)
    intege_encoded = label_data.reshape(len(label_data), 1)
    print(intege_encoded)
    onehot_encoded = onehot_encoder.fit_transform(intege_encoded)
    print(onehot_encoded)


if __name__ == '__main__':
    data = pd.DataFrame([["green", "S", 100, "label1"],
                         ["blue", "M", 110, "label2"],
                         ["red", "X", 120, "label3"],
                         ["black", "XL", 130, "label4"],
                         ["black", "XL", 130, "label4"]])
    print(data)
    func1(data)
    print("*" * 30, "fa2", "*" * 30)
    func2(data)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值