import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline
from sklearn import datasets
import pandas as pd
from sklearn.linear_model import LogisticRegression
path=r"C:\Users\Tsinghua-yincheng\Desktop\SZday93\adults.txt"
data=pd.read_csv(path)
print(type(data))
data["age"]
data.shape
data.index
data.columns
X=data[['age','education','marital_status','sex' ,'hours_per_week']]
X
Y=data['salary']
Y
X=data[['age','education','occupation' ,'hours_per_week']]
X
X["education"].unique().size
X["occupation"].unique()
def edu2float(edu):
return np.argwhere(X["education"].unique()==edu)[0,0]
def occupation2float(occupation):
return np.argwhere(X["occupation"].unique()==occupation)[0,0]
X["occupation"]=X["occupation"].map(occupation2float)
X["education"]=X["education"].map(edu2float)
X_train=X.iloc[:-1000]
X_test=X.iloc[-1000:]
Y_train=Y.iloc[:-1000]
Y_test=Y.iloc[-1000:]
logic=LogisticRegression()
logic.fit(X_train,Y_train)
Ynew=logic.predict(X_test)
result=(Y_test==Ynew)
accuracy=result.sum()/1000
accuracy