day2

最新推荐文章于 2024-05-11 16:00:47 发布

qq_34116299

最新推荐文章于 2024-05-11 16:00:47 发布

阅读量104

点赞数

分类专栏：机器学习实战

本文链接：https://blog.csdn.net/qq_34116299/article/details/89225178

版权

机器学习实战专栏收录该内容

2 篇文章 0 订阅

订阅专栏

画出直方图

import matplotlib.pyplot as plt

name_list = ['angry', 'disgust', 'fear', 'happy','sad','surprise','contempt']

num_list = [135,177,75,207,84,988,54]

rects = plt.bar(range(len(num_list)), num_list, color='rgby')

# X轴标题

index = [0, 1, 2, 3,4,5,6]

plt.ylim(ymax=1000, ymin=0)

plt.xticks(index, name_list)

plt.ylabel("number")  # X轴标签

for rect in rects:
    height = rect.get_height()

    plt.text(rect.get_x() + rect.get_width() / 2, height, str(height) , ha='center', va='bottom')

plt.show()

吴恩达线形回归

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
path=r"C:\Users\29117\Desktop\ex1-linear regression\ex1data1.txt"
data=pd.read_csv(path,header=None,names=["Population",'Profit'])
# data.head()预览数据，默认是数据的前5行
# data.describe() 查看数据的统计特征 比如 多少数据 数据的均值 方差 最小值最大值等信息
#data.plot(kind='scatter',x='Population',y='Profit',figsize=(12,8))
#data=(data-data.mean())/data.std()#特征归一化
data.insert(0,'Ones',1)
cols=data.shape[1]#两个特征，一个表情 总共三列
X=data.iloc[:,0:cols-1]#抽取特征
y=data.iloc[:,cols-1:cols]#抽取标签
X=np.matrix(X.values)
y=np.matrix(y.values)
theta=np.matrix(np.zeros((1,2)))
#计算损失
def computeCost(X,y,theta):
    J=np.multiply(np.dot(X,theta.T)-y,np.dot(X,theta.T)-y)
    cost=np.sum(J)/(2*y.shape[0])
    return cost
#批量梯度下降
'''
def gradientDescent(X,y,theta,alpha,iters):
    temp=np.matrix(np.zeros(theta.shape))
    parameters = int(theta.ravel().shape[1])  #需要学习的参数的数目，本次有两个
    cost=np.zeros(iters)
    """
    :param X: 输入数据(97,2)
    :param y: 输出数据(97,1)
    :param theta: 需要找到的参数(1,2)
    :param alpha: 学习率 人为指定
    :param iters: 训练次数 人为指定
    :return: 返回最终的学习率 和损失
    """
    for i in range(iters):
        # J求导
        error=np.dot(X,theta.T)-y # h(x)-y
        #更新参数 两个参数
        for j in range(parameters):
            trem=np.multiply(error,X[:,j])
            temp[0,j] = temp[0,j]-(alpha/len(y))*np.sum(trem)
        theta=temp
        cost[i]=computeCost(X,y,theta)
    return theta,cost

'''
def gradientDescent(X, y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))
   # parameters = int(theta.ravel().shape[1])
    cost = np.zeros(iters)
    t0=temp[0,0]
    t1=temp[0,1]
    d0=0.0
    d1=0.0
    for i in range(iters):
        for j in range(X.shape[0]):
            d0+=(X[j]*theta.T-y[j])*X[j,0]
            d1+=(X[j]*theta.T-y[j])*X[j,1]
        t0-=alpha*d0/X.shape[0]
        t1 -= alpha*d1/X.shape[0]
        theta = np.matrix([t0[0,0],t1[0,0]])
        cost[i] = computeCost(X, y, theta)
    return theta, cost
alpha=0.01
iters=1000
g,cost=gradientDescent(X,y,theta,alpha,iters)
print(g)
#画图

x=np.linspace(data.Population.min(),data.Population.max(),100)
f=g[0,0]+g[0,1]*x
fig,ax=plt.subplots(figsize=(12,8))
ax.plot(x,f,'r',label='Prediction')
ax.scatter(data.Population,data.Profit,label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Proit Vs Population Size')
plt.show()

#图二
fig,ax=plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters),cost,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs Training Epoch')
plt.show()