import pands as pd
import numpy as np
data_train=pd.read_csv("文件地址")
x_train=data_train.loc[:,'t']
y_train=data_train.loc[:,'rate']
%matplotlib inline
from matplotlib import pyplot as plt
fig1=plt.figure(figsize=(5,5))
plt.scatter(x_train,y_train)
plt.title('raw data')
plt.xlable('x_train')
plt.ylable('y_train')
plt.show()
x_train=np.array(x_train).reshape(-1,1)
from sklearn.linear_model import LinearRegression
lr1=LinearRegression()
lr1.fit(x_train,y_train)
data_test=pd.read_csv("文件地址test")
x_test=data_test.loc[:,'t']
y_test=data_test.loc[:,'rate']
x_test=np.array(x_test).reshape(-1,1)
y_train_predict=lr1.predict(x_train)
y_test_predict=lr1.predict(x_test)
from sklearn.metrics import r2_score
r2_train=r2_score(y_train,y_train_predict)
r2_test=r2_score(y_test,y_test_predict)\
x_range=np.linspace(40,90,300).reshape(-1,1)
y_range_predict=lr1.predict(x_range)
fig2=plt.figure(figsize=(5,5))
plt.plot(x_range,y_range_predict)
plt.title('raw data')
plt.xlable('x_train')
plt.ylable('y_train')
plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly2=PolynomialFeatures(degree=2)
x_2_train=poly2.fit_transfrom(x_train)
x_2_test=poly2.transform(x_test)
lr2=LinearRegression()
lr2.fit(x_2_train,y_train)
y_2_train_predict=lr2.predict(x_2_train)
y_2_test_predict=lr2.predict(x_2_test)
r2_2_train=r2_score(y_train,y_2_train_predict)
r2_2_test=r2_score(y_test,y_2_test_predict)
x_2_range=np.linspace(40,90,300).reshape(-1,1)
x_2_range=poly2.fit_transfrom(x_2_range)
y_2_range_predict=lr1.predict(x_2_range)
fig3=plt.figure(figsize=(5,5))
plt.plot(x_2_range,y_2_range_predict)
plt.scatter(x_train,y_train)
plt.scatter(x_test,y_test)
plt.title('raw data')
plt.xlable('x_train')
plt.ylable('y_train')
plt.show()
import pands as pd
import numpy as np
data=pd.read_csv("文件地址")
data.head()
x=data.dorp(['y'],axis=1)
y=data.loc[:,'y']
%matplotlib inline
from matplotlib import pyplot as plt
fig1=plt.figure(figsize=(5,5))
bad=plt.scatter(x.loc[:,'x1'][y==0],x.loc[:,'x2'][y==0])
good=plt.scatter(x.loc[:,'x1'][y==1],x.loc[:,'x2'][y==1])
plt.legend((good,bad),('good','bad'))
plt.title('xy data')
plt.xlable('x1')
plt.ylable('x2')
plt.show()
from sklearn.covariance import EllipticEnvelope
ad_model=EllipticEnvelope(contamiantion=0.02)
ad_modle.fit(x[y==0])
y_predict_bad=ad_model.predict(x[y==0])
print(y_predict_bad)
fig2=plt.figure(figsize=(5,5))
bad=plt.scatter(x.loc[:,'x1'][y==0],x.loc[:,'x2'][y==0])
good=plt.scatter(x.loc[:,'x1'][y==1],x.loc[:,'x2'][y==1])
plt.scatter(x.loc[:,'x1'][y==0][y_predict_bad==-1],x.loc[:,'x2'][y==0][y_predict_bad==-1],marker='x',s=150)
plt.legend((good,bad),('good','bad'))
plt.title('xy data')
plt.xlable('x1')
plt.ylable('x2')
plt.show()
data=pd.read_csv("文件地址")
data.head()
x=data.dorp(['y'],axis=1)
y=data.loc[:,'y']
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
x_norm=StandardScaler().fit_transfrom(x)
pca=PCA(n_compoents=2)
x_reduced=pca.fit_transfrom(x_norm)
var_ratio=pca.explained_variance_ratio_
print(var_ratio)
fig4=plt.figure(figsize(5,5))
plt.bat([1,2],var_ratio)
plt.show()
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=4,test_size=0.4)
from sklearn.neighbors import KNeighborsClassifier
knn_10=KNeighborsClassifier(n_neighbors=10)
knn_10.fit(x_train,y_train)
y_train_predict=knn_10.predict(x_train)
y_test_predict=knn_10.predict(x_test)
from sklearn.metrics import accracy_score
accracy_train=accracy_score(y_train,y_train_predict)
accracy_test=accracy_score(y_test,y_test_predict)
xx,yy=np.meshgrid(np.arange(0,10,0.05),np.arange(0,10,0.05))
print(yy.shape)
x_range=np.c_[xx.ravel(),yy.ravel()]
print(x_range.shape)
y_range_predict=knn_10.predict(x_range)
fig4=plt.figure(figsize=(5,5))
knn_bad=plt.scatter(x_range.loc[:,0][y_range_predict==0],x_range.loc[:,1][y_range_predict==0])
knn_good=plt.scatter(x_range.loc[:,'x1'][y_range_predict==1],x_range.loc[:,'x2'][y_range_predict==1])
bad=plt.scatter(x.loc[:,'x1'][y==0],x.loc[:,'x2'][y==0])
good=plt.scatter(x.loc[:,'x1'][y==1],x.loc[:,'x2'][y==1])
plt.legend((knn_good,knn_bad),('knn_good','knn_bad'))
plt.title('xy data')
plt.xlable('x1')
plt.ylable('x2')
plt.show()
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_test_predict)
print(cm)
TP=cm[1,1]
TN=cm[0,0]
FP=cm[0,1]
FN=cm[1,0]
accuracy=(TP+TN)/(TP+TN+FP+FN)
sensitivity=recall=TP/(TP+FN)
specificity=TN/(TN+FP)
precision=TP/(TP+FP)
f1=2*precision*recall/(precision+recall)
n=[i for i in range(1,21)]
accracy_train=[]
accracy_test=[]
for i in n:
knn=KNeighborsClassifier(n_neighbors=i)
knn.fit(x_train,y_train)
y_train_predict=knn.predict(x_train)
y_test_predict=knn.predict(x_test)
accracy_train_i=accracy_score(y_train,y_train_predict)
accracy_test_i=accracy_score(y_test,y_test_predict)
accracy_train.append(accracy_train_i)
accracy_test.append(accracy_test_i)
print(accracy_train,accracy_test)
fig4=plt.figure(figsize=(12,5))
plt.subplot(121)
plt.plot(n.accuracy_train,marker='o')
plt.title('accracy vs n_neighbors')
plt.xlable('n_neighbors')
plt.ylable('accracy')
plt.subplot(122)
plt.plot(n.accuracy_test,marker='x')
plt.title('test_accracy vs n_neighbors')
plt.xlable('n_neighbors')
plt.ylable('accracy')
plt.show()