Python：One-hot-encoding 例子

最新推荐文章于 2023-06-02 16:39:17 发布

DeniuHe

最新推荐文章于 2023-06-02 16:39:17 发布

阅读量534

点赞数

分类专栏： Python学习

本文链接：https://blog.csdn.net/DeniuHe/article/details/112601723

版权

Python学习专栏收录该内容

239 篇文章 14 订阅

订阅专栏

本文介绍了使用Python进行数据预处理的方法，包括性别、年龄、身高、体重等特征的编码，以及OneHotEncoder和LabelEncoder的运用。通过读取并操作ObesityDataSet.csv，最终将数据整合为DataFrame并保存为CSV文件。

摘要由CSDN通过智能技术生成

import pandas as pd
import numpy as np
import os
os.getcwd()
os.chdir("D:\OCdata")
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv("ObesityDataSet_raw_and_data_sinthetic.csv")

# print(data["family_history_with_overweight"].value_counts())

Gender = np.array(pd.get_dummies(data["Gender"]))

print("Gender",Gender.shape)


Age = np.array(data["Age"]).reshape(-1,1)
print("Age",Age.shape)

Height = np.array(data["Height"]).reshape(-1,1)
print("height",Height.shape)

Weight = np.array(data["Weight"]).reshape(-1,1)
print("Weight",Weight.shape)

family = np.array(pd.get_dummies(data["family_history_with_overweight"]))
print("family",family.shape)

FCVC = np.array(data["FCVC"]).reshape(-1,1)
print("FCVC",FCVC.shape)


NCP = np.array(data["NCP"]).reshape(-1,1)
print("NCP",NCP.shape)

CAEC = np.array(pd.get_dummies(data["CAEC"]))
print("CAEC",CAEC.shape)

smoke = np.array(pd.get_dummies(data["SMOKE"]))
print("smoke",smoke.shape)

CH2O = np.array(data["CH2O"]).reshape(-1,1)
print("CH2O",CH2O.shape)

SCC = np.array(pd.get_dummies(data["SCC"]))
print("SCC",SCC.shape)

FAF = np.array(data["FAF"]).reshape(-1,1)
print("FAF ",FAF.shape)

TUE = np.array(data["TUE"]).reshape(-1,1)
print("TUE ",TUE.shape)

CALC = np.array(pd.get_dummies(data["CALC"]))
print("CALC",CALC.shape)

MTRANS = np.array(pd.get_dummies(data["MTRANS"]))
print("MTRANS",MTRANS.shape)


print(data["NObeyesdad"].value_counts())

NObeyesdad = np.array(data["NObeyesdad"])

decision = []
for ele in NObeyesdad:
    if ele == "Insufficient_Weight":
        decision.append(0)
    elif ele == "Normal_Weight":
        decision.append(1)
    elif ele == "Overweight_Level_I":
        decision.append(2)
    elif ele == "Overweight_Level_II":
        decision.append(2)
    elif ele == "Obesity_Type_I":
        decision.append(3)
    elif ele == "Obesity_Type_II":
        decision.append(3)
    elif ele == "Obesity_Type_III":
        decision.append(3)

decision = np.array(decision).reshape(-1,1)
print("decision",decision.shape)

Data = np.concatenate((Gender,Age,Height,Weight,family,FCVC,NCP,CAEC,smoke,CH2O,SCC,FAF,TUE,CALC,MTRANS,decision),axis=1)
print(Data.shape)
Data = pd.DataFrame(Data,index=None)
Data.to_csv(r"D:\OCdata\Obesity.csv",index=None,header=None)