import numpy as np import pandas as pd df = pd.read_csv('hw.csv') df.dropna() df.drop_duplicates() from sklearn.preprocessing import LabelEncoder df['Gender'] = LabelEncoder().fit_transform(df['Gender']) print(df) X = df[['Height','Weight']] Y = df[['Gender']] from sklearn.model_selection import train_test_split X_train,X_test,Y_train,Y_test = train_test_split(X,Y,train_size=0.8) # 数据建模 # 逻辑回归 from sklearn.linear_model import LogisticRegression # 逻辑回归类创建对象 # model = LogisticRegression() # 朴素贝叶斯 # from sklearn.naive_bayes import MultinomialNB # model = MultinomialNB() # 决策树 # from sklearn.tree import DecisionTreeClassifier # model = DecisionTreeClassifier() # 支持向量机 from sklearn.svm import SVC model = SVC() model.fit(X_train,Y_train) print(model.score(X_test,Y_test)) print(model.predict([[165,58]])) import matplotlib.pyplot as plt h = np.arange(150,200,0.1) w = np.arange(30,90,0.1) newh,neww = np.meshgrid(h,w) plt.pcolormesh(newh,neww,model.predict(np.c_[newh.ravel(),neww.ravel()]).reshape(newh.shape)) plt.scatter(df['Height'],df['Weight'],c=df['Gender']) plt.show()
hw.csv数据:
Gender,Age,Height,Weight M,21,163,60 M,22,164,56 M,21,165,60 M,23,168,55 M,21,169,60 M,21,170,54 M,23,170,80 M,23,170,64 M,22,171,67 M,22,172,65 M,23,172,60 M,21,172,60 M,23,173,60 M,22,173,62 M,21,174,65 M,22,175,70 M,22,175,70 M,22,175,65 M,23,175,60 M,21,175,62 M,21,176,58 M,21,178,70 M,23,178,75 M,23,180,63 M,23,180,71 M,23,183,75 F,20,153,42 F,20,156,44 F,21,156,38 F,21,157,48 F,21,158,52 F,23,158,45 F,22,159,43 F,22,160,50 F,21,160,45 F,21,160,52 F,23,160,50 F,22,161,50 F,21,161,45 F,21,162,55 F,20,162,60 F,20,163,56 F,20,163,56 F,21,163,59 F,22,164,55 F,23,164,47 F,21,165,45 F,21,165,45 F,20,165,60 F,20,168,58 F,21,168,49