我做了一个导航站(域名是挂路灯的全拼gualudeng.com),里面精选了各种影视,动漫,黑科技,实用工具,搞笑有趣的站点,动动大家可爱的小手,点进来看看吧,良心站点。
#!/usr/bin/python
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import GridSearchCV
import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['simhei']
mpl.rcParams['axes.unicode_minus']=False
if __name__ == "__main__":
# pandas读入
data = pd.read_csv('Advertising.csv') # TV、Radio、Newspaper、Sales
x = data[['TV', 'Radio', 'Newspaper']]
y = data['Sales']
#绘制原始数据关系图
plt.scatter(x['TV'],y,marker="^",label="tv")
plt.scatter(x['Radio'],y,marker="v",label="radio")
plt.scatter(x['Newspaper'],y,marker="*",label="newspaper")
plt.legend(loc='upper right')
plt.show()
#划分训练集
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)
#model = Lasso()
model = Ridge()
#开始交叉验证
alpha_can = np.logspace(-3, 2, 10) #这是验证的参数列表
ridge_model = GridSearchCV(model, param_grid={'alpha': alpha_can}, cv=5)
ridge_model.fit(x_train, y_train)
print '最优参数:', ridge_model.best_params_
pre_y = ridge_model.predict(np.array(x_test))
mse = np.average((pre_y - np.array(y_test)) ** 2) # Mean Squared Error
rmse = np.sqrt(mse) # Root Mean Squared Error
print mse, rmse
t = np.arange(len(x_test))
plt.plot(t, y_test, 'r-', linewidth=2, label=u'原值')
plt.plot(t, pre_y, 'g-', linewidth=2, label=u'预测值')
plt.legend(loc='upper right')
plt.grid()
plt.show()
这个例子是,在tv,radio,nespaper.三个渠道上的广告投入,对销量的影响。(需要数据的留言)