python-编码实现指数平滑法移动平均法

最新推荐文章于 2024-07-30 09:09:41 发布

机智的小陈今天学习了吗

最新推荐文章于 2024-07-30 09:09:41 发布

阅读量6k

点赞数 7

分类专栏：信息分析与预测文章标签： python 数据分析

本文链接：https://blog.csdn.net/weixin_43890415/article/details/116452775

版权

信息分析与预测专栏收录该内容

4 篇文章 0 订阅

订阅专栏

文章目录

1.实现移动平均

a)一次移动平均，取多个n值，计算标准差；
b)二次移动平均，计算参数a,b，进行预测；

代码

：

# -*- coding=utf-8 -*-
# name: nan chen
# date: 2021/4/8 14:28

import csv
import matplotlib.pyplot as plt

# 读取数据文件
with open(r"D:\Downloads\train.csv", encoding="utf-8") as f:
    reader = csv.reader(f)
    header_row = next(reader)
    counts = []
    ids = []
    for row in reader:
        ids.append(int(row[0]))
        counts.append(int(row[2]))
plt.plot(ids, counts, color="blue", linewidth=1, linestyle=':', marker=',')

length = len(counts)
singemovings = []
pos = 0
min = 1000000
# 一次移动平均
for n in range(2, 201):
    singemoving = []
    mse_sum = 0
    for i in range(n - 1, length):
        sum = 0
        for j in range(0, n):
            sum = sum + (counts[i - j])
        singemoving.append(int(sum / n))
    singemovings.append(singemoving)
    # 求解MSE
    for x, y in zip(range(len(counts) - 1, n - 1, -1), range(len(singemoving) - 2, -1, -1)):
        mse_sum = mse_sum + ((counts[x] - singemoving[y]) ** 2)
    mse = mse_sum / (len(counts) - n)
    sq_mse = mse** 0.5
    if mse < min:
        min = mse
        pos = n
    print("n=%s 标准差=%s mse=%s" % (n, sq_mse, mse))
    # 打印结果
    print("n=%s 一次移动平均法的预测值为：%s" % (n, singemoving[len(singemoving) - 1]))
# plt.plot(ids[n - 1:], singemoving, color="red", linewidth=1, linestyle=':', marker=',', label='一次移动平均法')

# 二次移动平均
# 选取mse最小的值计算二次移动平均
n = pos
singemoving = singemovings[n - 2]
twicemoving = []
for i in range(n - 1, len(singemoving)):
    sum = 0
    for j in range(0, n):
        sum = sum + singemoving[i - j]
    twicemoving.append(int(sum / n))

# 二次移动平均预测值
a = singemoving[len(singemoving) - 1] * 2 - twicemoving[len(twicemoving) - 1]
b = (2 / (n - 1)) * (singemoving[len(singemoving) - 1] - twicemoving[len(twicemoving) - 1])
x = a + b
print("n=%s 二次移动平均法的预测值为：%s" % (n, x))

2.实现指数平滑

a)一次指数平滑，取多个a值；
b)二次指数平滑（可选）；

代码：

# -*- coding=utf-8 -*-
# name: nan chen
# date: 2021/4/9 10:56

import csv
import matplotlib.pyplot as plt

# 读取数据文件
with open(r"D:\Downloads\train.csv", encoding="utf-8") as f:
    reader = csv.reader(f)
    header_row = next(reader)
    counts = []
    ids = []
    for row in reader:
        ids.append(int(row[0]))
        counts.append(int(row[2]))

# 一次指数平滑法
s = []
list_a = [2 / (len(counts) + 1), 0.1, 0.2, 0.3, 0.4, 0.5, 0.8, 0.9]
# colors = ["brown", "green", "red", "gray", "yellow"]
# 取初始值为x0
for a in list_a:
    s1 = [counts[0]]
    for i in range(0, len(counts)):
        tmp = a * counts[i] + (1 - a) * s1[i]
        s1.append(tmp)
    s.append(s1)
for i in range(0, len(list_a)):
    s_i = s[i]
    print("a=%s 一次指数平滑法的预测值为：%s" % (list_a[i], s_i[len(s_i) - 1]))
    # plt.plot(ids, s_i[1:], label='a = %s' % a, color=colors[i], linewidth=1, linestyle=':', marker=',')

# plt.show()

# 二次指数平滑法
twice_s = []
j = 0
for a in list_a:
    s2 = [counts[0]]
    single_s = s[j]
    for i in range(1, len(counts)):
        tmp = a * single_s[i] + (1 - a) * s2[i - 1]
        s2.append(tmp)
    twice_s.append(s2)
    j = j + 1

for i in range(0, len(list_a)):
    single = s[i]
    twice = twice_s[i]
    at = 2 * single[len(single) - 1] - twice[len(twice) - 1]
    bt = (list_a[i] / 1 - list_a[i]) / (single[len(single) - 1] - twice[len(twice) - 1])
    x = at + bt
    print("a=%s 二次指数平滑法的预测值为%s" % (list_a[i], x))

3.数据集的检查

代码：

# -*- coding=utf-8 -*-
# name: nan chen
# date: 2021/4/10 9:52
import pandas as pd
import matplotlib.pyplot as plt

# 检查数据集是否存在空缺
data = pd.read_csv(r"D:\Downloads\train.csv")
total = data.isnull().sum().sort_values(ascending=False)
print(total)

# 绘制散点图观察是否存在偏离值
var = 'ID'
data1 = pd.concat([data['Count'], data[var]], axis=1)
data1.plot.scatter(x=var, y='Count', ylim=(0, 4000), s=1)
plt.show()

# 统计某一列中各个元素值出现的次数
c = data['Count'].value_counts()
print(c)

# 列出数据的偏斜度
ske = data['Count'].skew()
print("Count列的偏斜度%s " % ske)

# 计算count和id的相关系数
cor = data['Count'].corr(data['ID'])
print("Count列与ID列的相关系数为%s " % cor)