import pandas as pd
import numpy as np
import os
os.getcwd()
os.chdir("D:\OCdata")
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
data = pd.read_csv("ObesityDataSet_raw_and_data_sinthetic.csv")
# print(data["family_history_with_overweight"].value_counts())
Gender = np.array(pd.get_dummies(data["Gender"]))
print("Gender",Gender.shape)
Age = np.array(data["Age"]).reshape(-1,1)
print("Age",Age.shape)
Height = np.array(data["Height"]).reshape(-1,1)
print("height",Height.shape)
Weight = np.array(data["Weight"]).reshape(-1,1)
print("Weight",Weight.shape)
family = np.array(pd.get_dummies(data["family_history_with_overweight"]))
print("family",family.shape)
FCVC = np.array(data["FCVC"]).reshape(-1,1)
print("FCVC",FCVC.shape)
NCP = np.array(data["NCP"]).reshape(-1,1)
print("NCP",NCP.shape)
CAEC = np.array(pd.get_dummies(data["CAEC"]))
print("CAEC",CAEC.shape)
smoke = np.array(pd.get_dummies(data["SMOKE"]))
print("smoke",smoke.shape)
CH2O = np.array(data["CH2O"]).reshape(-1,1)
print("CH2O",CH2O.shape)
SCC = np.array(pd.get_dummies(data["SCC"]))
print("SCC",SCC.shape)
FAF = np.array(data["FAF"]).reshape(-1,1)
print("FAF ",FAF.shape)
TUE = np.array(data["TUE"]).reshape(-1,1)
print("TUE ",TUE.shape)
CALC = np.array(pd.get_dummies(data["CALC"]))
print("CALC",CALC.shape)
MTRANS = np.array(pd.get_dummies(data["MTRANS"]))
print("MTRANS",MTRANS.shape)
print(data["NObeyesdad"].value_counts())
NObeyesdad = np.array(data["NObeyesdad"])
decision = []
for ele in NObeyesdad:
if ele == "Insufficient_Weight":
decision.append(0)
elif ele == "Normal_Weight":
decision.append(1)
elif ele == "Overweight_Level_I":
decision.append(2)
elif ele == "Overweight_Level_II":
decision.append(2)
elif ele == "Obesity_Type_I":
decision.append(3)
elif ele == "Obesity_Type_II":
decision.append(3)
elif ele == "Obesity_Type_III":
decision.append(3)
decision = np.array(decision).reshape(-1,1)
print("decision",decision.shape)
Data = np.concatenate((Gender,Age,Height,Weight,family,FCVC,NCP,CAEC,smoke,CH2O,SCC,FAF,TUE,CALC,MTRANS,decision),axis=1)
print(Data.shape)
Data = pd.DataFrame(Data,index=None)
Data.to_csv(r"D:\OCdata\Obesity.csv",index=None,header=None)