import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
def func1(data): # 定义一个数据框
data.columns = ["color", "size", "price", "label"]
print("加标题:\n", data, end="\n***************\n")
print("label 类别:", np.unique(data['label']))
lebel_mapping = {lebel: idx for idx, lebel in enumerate(np.unique(data['label']))}
print("lebel_mapping", lebel_mapping)
data['lebel'] = data['label'].map(lebel_mapping)
onehot_label = []
for val in data['label']:
onehot_chrs = [0] * len(lebel_mapping)
onehot_chrs[lebel_mapping.get(val)] = 1
onehot_label.append(onehot_chrs)
print(np.mat(onehot_label))
def func2(data):
le = LabelEncoder()
label_data = le.fit_transform(data['label'])
data['label'] = label_data
print(data)
onehot_encoder = OneHotEncoder(sparse=False)
intege_encoded = label_data.reshape(len(label_data), 1)
print(intege_encoded)
onehot_encoded = onehot_encoder.fit_transform(intege_encoded)
print(onehot_encoded)
if __name__ == '__main__':
data = pd.DataFrame([["green", "S", 100, "label1"],
["blue", "M", 110, "label2"],
["red", "X", 120, "label3"],
["black", "XL", 130, "label4"],
["black", "XL", 130, "label4"]])
print(data)
func1(data)
print("*" * 30, "fa2", "*" * 30)
func2(data)
类别处理及onehot练习
最新推荐文章于 2021-10-21 22:29:17 发布