#导入模块
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn import metrics
from sklearn.linear_model import LinearRegression
#这里不选用随机森林模型from sklearn.ensemble import RandomForestRegressor
#StandardScaler是一个用于特征缩放的类,可以将特征缩放为均值为0,方差为1的标准正态分布
from sklearn.preprocessing import StandardScaler
#mean_absolute_error、mean_squared_error和r2_score指标来评估模型的性能,指标越小越好
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
df = pd.read_csv(r"C:\Users\Ziyin\Desktop\Salary Data.csv")
#选择特征和变量
x = df[["YearsExperience"]]
y = df["Salary"]
#拆分测试集和训练集
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size=0.3, random_state=0)
model=LinearRegression()
model.fit(x_train,y_train)
excepted=y_test
predicted=model.predict(x_test)
#获取输入为15时的预测值
years_exp = [[15]]
salary_pred = model.predict(years_exp)
plt.figure()
plt.plot(x, y, c="g", label="Actual", linewidth="2")
plt.scatter(years_exp, salary_pred, c="r", label="Predicted")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.title("Salary vs Years of Experience")
plt.legend()
plt.show()