线性回归
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
y = [8450.0, 8061.0, 7524.0, 7180.0, 8247.0, 8929.0, 8896.0, 9736.0, 9658.0, 9592.0]
x = range(len(y))
equation = np.poly1d(np.polyfit(x, y, 1))
print(equation)
best_fit_line = np.poly1d(np.polyfit(x, y, 1))(x)
print(best_fit_line)
reg = linear_model.LinearRegression()
reg.fit (np.array(x).reshape(-1,1),np.array(y).reshape(-1,1))
print ('斜率: ',reg.coef_[0][0])
print ('截距: ',reg.intercept_[0])
slope = (y[-1] - y[0]) / (x[-1] - x[0])
angle = np.arctan(slope)
print ('slope: ' + str(slope))
print ('angle: ' + str(angle))
plt.figure(figsize=(8,6))
plt.plot(y)
plt.plot(best_fit_line, '--', color='r')
plt.show()
聚类分析
'''
聚类分析
'''
import openpyxl
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import k_means
from sklearn.metrics import silhouette_score
data = pd.read_csv("test.csv",encoding='gbk', header=0)
x = data[['平均消费周期(天)', '平均每次消费金额']]
score = []
for i in range(10):
model = k_means(x, n_clusters=i + 2)
ss = silhouette_score(x, model[1])
score.append(ss)
model = k_means(x, 2)
print(model)
plt.subplot(1, 2, 1)
plt.scatter(data['平均消费周期(天)'], data['平均每次消费金额'], c=model[1])
plt.subplot(1, 2, 2)
plt.plot(range(2, 12, 1), score)
plt.show()