结合Pandas、Seaborn以及skLearn来对一个联合发电厂的发电效能数据进行处理
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
data = pd.read_csv("ccpp.csv")
#AT与PE的关系图
sb.pairplot(data, x_vars=["AT"], y_vars=["PE"], size=8, aspect=1.3)
sb.pairplot(data, x_vars=["V"], y_vars=["PE"], size=8, aspect=1.3)
sb.pairplot(data, x_vars=["AP"], y_vars=["PE"], size=8, aspect=1.3)
sb.pairplot(data, x_vars=["RH"], y_vars=["PE"], size=8, aspect=1.3)
plt.show()
x = data[["AT", "V", "AP", "RH"]]
y = data["PE"]
from sklearn.model_selection import train_test_split
x_1, x_2, y_1, y_2 = train_test_split(x, y, random_state=1)
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_1, y_1)
print(lr.intercept_)
print(lr.coef_)
y_3 = lr.predict(x_2)
import numpy as np
from sklearn import metrics
print(np.sqrt(metrics.mean_squared_error(y_2, y_3)))
AT与PE的关系图:
V与PE的关系图:
AP与PE的关系图:
RH与PE的关系图: