import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline
from sklearn import datasets
import pandas as pd
from sklearn.linear_model import LogisticRegression
path=r"C:\Users\Tsinghua-yincheng\Desktop\SZday93\adults.txt"
data=pd.read_csv(path)
print(type(data))
data["age"]
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/d7263943e41d986719d05ee5a8c73df6.png)
data.shape
data.index
data.columns
X=data[['age','education','marital_status','sex' ,'hours_per_week']]
X
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/2fdb993d13b60bd8c2c17fbc9b7df5fd.png)
Y=data['salary']
Y
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/22e0322b477e2e39a654dd3c88f12ab6.png)
X=data[['age','education','occupation' ,'hours_per_week']]
X
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/6723c5942cadfe6bf3d39929ac3e76b7.png)
X["education"].unique().size
X["occupation"].unique()
def edu2float(edu):
return np.argwhere(X["education"].unique()==edu)[0,0]
def occupation2float(occupation):
return np.argwhere(X["occupation"].unique()==occupation)[0,0]
X["occupation"]=X["occupation"].map(occupation2float)
X["education"]=X["education"].map(edu2float)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/ab29191cef6d6f046795f16cd22fa48b.png)
X_train=X.iloc[:-1000]
X_test=X.iloc[-1000:]
Y_train=Y.iloc[:-1000]
Y_test=Y.iloc[-1000:]
logic=LogisticRegression()
logic.fit(X_train,Y_train)
Ynew=logic.predict(X_test)
result=(Y_test==Ynew)
accuracy=result.sum()/1000
accuracy
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/baa292f87747bb8aabddec356145766a.png)