导入相关库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
导入数据
#load data
fpath = r'.../ex2data1.txt'
df = pd.read_table(fpath, engine='python', header=None, sep=',')
df.rename(columns={
0:'Exam_1', 1:'Exam_2', 2:'Admitted'}, inplace=True)
数据预处理
#数据特征缩放到[0,1]区间
df_norm = df.apply(lambda x: (x - x.min()) / (x.max() - x.min()))
数据可视化
#plot data
plt.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'], edgecolors='k', color='y', label='Not Admitted')
plt.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'], marker='+', color='k', label='Admitted')
plt.legend(loc='upper right')
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.title('Figure 1:Scatter plot of training data')
Sigmoid Function
-
hypothesis: h θ ( x ) = g ( θ T x ) h_{\theta}(x)=g({\theta}^Tx) hθ(x)=g(θTx)
-
sigmoid function: g ( z ) = 1 1 + e − z g(z)={\frac{1}{1+e^{-z}}}