pandas数据分析建模简要

最新推荐文章于 2023-09-30 23:58:20 发布

zk仔的博客

最新推荐文章于 2023-09-30 23:58:20 发布

阅读量669

点赞数

分类专栏： python_数据分析

本文链接：https://blog.csdn.net/weixin_39532362/article/details/104207859

版权

python_数据分析专栏收录该内容

15 篇文章 0 订阅

订阅专栏

pandas数据分析简要

线性回归
聚类分析

线性回归

# -*- conding:utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model


y = [8450.0, 8061.0, 7524.0, 7180.0, 8247.0, 8929.0, 8896.0, 9736.0, 9658.0, 9592.0]
x = range(len(y))


# 方程
equation = np.poly1d(np.polyfit(x, y, 1))
print(equation)


# 回归线值列表
best_fit_line = np.poly1d(np.polyfit(x, y, 1))(x)
print(best_fit_line)


# 建模
reg = linear_model.LinearRegression()
reg.fit (np.array(x).reshape(-1,1),np.array(y).reshape(-1,1))
print ('斜率: ',reg.coef_[0][0])
print ('截距: ',reg.intercept_[0])


# 
slope = (y[-1] - y[0]) / (x[-1] - x[0])
angle = np.arctan(slope)
print ('slope: ' + str(slope))
print ('angle: ' + str(angle))


# 画图
plt.figure(figsize=(8,6))
plt.plot(y)
plt.plot(best_fit_line, '--', color='r')
plt.show()

聚类分析

# -*- coding:utf-8 -*-
'''
聚类分析
'''
import openpyxl
import pandas as pd 
import numpy as np 
from matplotlib import pyplot as plt

from sklearn.cluster import k_means
from sklearn.metrics import silhouette_score



# ---导入数据
data = pd.read_csv("test.csv",encoding='gbk', header=0)
x = data[['平均消费周期（天）', '平均每次消费金额']]


# ---依次计算 2 到 12 类的轮廓系数
score = []
for i in range(10):
    model = k_means(x, n_clusters=i + 2)
    ss = silhouette_score(x, model[1])
    score.append(ss)


# ---建立模型
model = k_means(x, 2)
print(model)


# ---绘图
# 聚类散点图
plt.subplot(1, 2, 1)
plt.scatter(data['平均消费周期（天）'], data['平均每次消费金额'], c=model[1])

# 轮廓系数折线图
plt.subplot(1, 2, 2)
plt.plot(range(2, 12, 1), score)
plt.show()