from sklearn import datasets # sklearn 的数据库里面有很多数据集
from sklearn.linear_model import LinearRegression # 线性回归模型
from sklearn import preprocessing # normolization(数据标准化)
from sklearn.cross_validation import train_test_split
load_mydata = datasets.load_files("xxxx")
load_data = datasets.load_boston() # 加载数据集数据
data_X = load_data.data # 样本属性
data_y = load_data.target # 样本标签
model = LinearRegression()
model.fit(data_X, data_y) # 训练
print(model.predict(data_X[:4,:])) # 预测值
print(data_y[:4]) # 真实值
X, y = datasets.make_regression(n_sample=100, n_feature=1, n_target=1, noise=1) # 自定义建立回归模型
X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.3) # 将训练集与测试集分开,测试集大小为30%(训练集为70%),目的避免人为误差。
model.fit(X_train, y_train) # 训练
print(model.predicr(X_test)) # 训练好的模型进行预测
print(y_test) # 与真实值进行对比
print(model.coef_) # X的参数(系数)
print(model.intercept_) # 与y轴交点
print(model.get_params()) # 模型参数
print(model.score(data_X, data_y)) # R^2 coefficent of determination(测定系数) 打分