利用熵和条件熵预测图书借阅

信息熵的基本了解
熵与空间位置预测

样本数量

对某人借书的种类及借此种类书的数量进行统计

import pandas as pd
import numpy as np
# 横向最多显示多少个字符, 一般80不适合横向的屏幕,平时多用200
pd.set_option('display.width', 500)
# 显示所有列
pd.set_option('display.max_columns', None)

#图书类别
Book_category = ['A','B','C','D','E','F','G','H','I','J','K','N','O','P','Q','R','S','T','U','V','X','Z']
# # 读取数据
# Person_data = pd.read_excel(r'Person_8748847336.xlsx').iloc[0: , 1:]
LOAN_DATE = ['2013/1/1', '2013/1/1', '2013/1/1', '2013/1/1', '2013/1/1', '2013/2/20', '2013/4/19', '2013/4/19', '2013/4/19', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/27', '2013/5/8', '2013/5/15', '2013/5/15', '2013/5/17', '2013/6/6', '2013/6/8', '2013/6/8', '2013/10/9', '2013/10/29', '2013/10/29', '2013/11/7', '2013/11/7', '2013/11/14', '2013/11/14', '2013/11/14', '2013/11/25', '2013/12/2', '2013/12/2', '2013/12/4', '2013/12/6', '2013/12/11', '2013/12/11', '2013/12/18', '2013/12/18', '2014/1/1', '2014/1/2', '2014/2/17', '2014/2/17', '2014/3/13', '2014/3/13', '2014/4/15', '2014/4/29', '2014/4/29', '2014/4/29', '2014/5/5', '2014/9/23', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/12/2', '2014/12/2', '2014/12/2', '2014/12/4', '2014/12/4', '2015/1/14', '2015/2/28', '2015/2/28', '2015/3/5', '2015/3/5', '2015/3/5', '2015/3/5', '2015/3/11', '2015/3/11', '2015/3/11', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/19', '2015/3/19']
time_number = ['2013/1/1', '2013/2/20', '2013/4/19', '2013/4/24', '2013/4/27', '2013/5/8', '2013/5/15', '2013/5/17', '2013/6/6', '2013/6/8', '2013/10/9', '2013/10/29', '2013/11/7', '2013/11/14', '2013/11/25', '2013/12/2', '2013/12/4', '2013/12/6', '2013/12/11', '2013/12/18', '2014/1/1', '2014/1/2', '2014/2/17', '2014/3/13', '2014/4/15', '2014/4/29', '2014/5/5', '2014/9/23', '2014/10/10', '2014/12/2', '2014/12/4', '2015/1/14', '2015/2/28', '2015/3/5', '2015/3/11', '2015/3/12', '2015/3/19']
ITEM_CALLNO = ['H', 'H', 'H', 'H', 'H', 'D', 'F', 'F', 'F', 'D', 'E', 'E', 'D', 'D', 'D', 'F', 'F', 'H', 'B', 'B', 'B', 'C', 'D', 'D', 'I', 'F', 'I', 'F', 'D', 'I', 'F', 'F', 'D', 'F', 'F', 'H', 'F', 'F', 'F', 'F', 'D', 'F', 'D', 'D', 'D', 'K', 'K', 'D', 'I', 'I', 'I', 'D', 'H', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'F', 'K', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'J', 'D']

N = len(ITEM_CALLNO)    #样本数量
t_n = len(time_number)    #次数
B_N = len(Book_category)
print("样本数量:",N)
#列表转化为字典
def Book_list_to_dic():
    Book_data_dic = {}
    for B in Book_category:
        Book_data_dic[B] = 0
    for B_I in ITEM_CALLNO:
        Book_data_dic[B_I] += 1
    return Book_data_dic

Book_data_dic = Book_list_to_dic()
print("借书种类及数量统计:\n",Book_data_dic)

结果:

样本数量: 85
借书种类及数量统计:
{'A': 0, 'B': 3, 'C': 1, 'D': 44, 'E': 2, 'F': 17, 'G': 0, 'H': 8, 'I': 6, 'J': 1, 'K': 3, 'N': 0, 'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'X': 0, 'Z': 0}

借某种类书的概率

计算借某种类书的概率,即先计算熵公式中的P(Xi)
在这里插入图片描述

import pandas as pd
from sympy import *
# 横向最多显示多少个字符, 一般80不适合横向的屏幕,平时多用200
pd.set_option('display.width', 500)
# 显示所有列
pd.set_option('display.max_columns', None)

#图书类别
Book_category = ['A','B','C','D','E','F','G','H','I','J','K','N','O','P','Q','R','S','T','U','V','X','Z']
# # 读取数据
# Person_data = pd.read_excel(r'Person_8748847336.xlsx').iloc[0: , 1:]
LOAN_DATE = ['2013/1/1', '2013/1/1', '2013/1/1', '2013/1/1', '2013/1/1', '2013/2/20', '2013/4/19', '2013/4/19', '2013/4/19', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/27', '2013/5/8', '2013/5/15', '2013/5/15', '2013/5/17', '2013/6/6', '2013/6/8', '2013/6/8', '2013/10/9', '2013/10/29', '2013/10/29', '2013/11/7', '2013/11/7', '2013/11/14', '2013/11/14', '2013/11/14', '2013/11/25', '2013/12/2', '2013/12/2', '2013/12/4', '2013/12/6', '2013/12/11', '2013/12/11', '2013/12/18', '2013/12/18', '2014/1/1', '2014/1/2', '2014/2/17', '2014/2/17', '2014/3/13', '2014/3/13', '2014/4/15', '2014/4/29', '2014/4/29', '2014/4/29', '2014/5/5', '2014/9/23', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/12/2', '2014/12/2', '2014/12/2', '2014/12/4', '2014/12/4', '2015/1/14', '2015/2/28', '2015/2/28', '2015/3/5', '2015/3/5', '2015/3/5', '2015/3/5', '2015/3/11', '2015/3/11', '2015/3/11', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/19', '2015/3/19']
time_number = ['2013/1/1', '2013/2/20', '2013/4/19', '2013/4/24', '2013/4/27', '2013/5/8', '2013/5/15', '2013/5/17', '2013/6/6', '2013/6/8', '2013/10/9', '2013/10/29', '2013/11/7', '2013/11/14', '2013/11/25', '2013/12/2', '2013/12/4', '2013/12/6', '2013/12/11', '2013/12/18', '2014/1/1', '2014/1/2', '2014/2/17', '2014/3/13', '2014/4/15', '2014/4/29', '2014/5/5', '2014/9/23', '2014/10/10', '2014/12/2', '2014/12/4', '2015/1/14', '2015/2/28', '2015/3/5', '2015/3/11', '2015/3/12', '2015/3/19']
ITEM_CALLNO = ['H', 'H', 'H', 'H', 'H', 'D', 'F', 'F', 'F', 'D', 'E', 'E', 'D', 'D', 'D', 'F', 'F', 'H', 'B', 'B', 'B', 'C', 'D', 'D', 'I', 'F', 'I', 'F', 'D', 'I', 'F', 'F', 'D', 'F', 'F', 'H', 'F', 'F', 'F', 'F', 'D', 'F', 'D', 'D', 'D', 'K', 'K', 'D', 'I', 'I', 'I', 'D', 'H', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'F', 'K', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'J', 'D']

N = len(ITEM_CALLNO)    #样本数量
t_n = len(time_number)    #次数
B_N = len(Book_category)
print("样本数量:",N)
#列表转化为字典
def Book_list_to_dic():
    Book_data_dic = {}
    for B in Book_category:
        Book_data_dic[B] = 0
    for B_I in ITEM_CALLNO:
        Book_data_dic[B_I] += 1
    return Book_data_dic

Book_data_dic = Book_list_to_dic()
print("借书种类及数量统计:\n",Book_data_dic)
#计算借书种类概率
Book_Probablity_dic = Book_data_dic
for B in Book_category:
    Book_Probablity_dic[B] = Rational(Book_Probablity_dic[B], N)
print("借书种类概率:\n",Book_Probablity_dic)

结果:

样本数量: 85
借书种类及数量统计:
 {'A': 0, 'B': 3, 'C': 1, 'D': 44, 'E': 2, 'F': 17, 'G': 0, 'H': 8, 'I': 6, 'J': 1, 'K': 3, 'N': 0, 'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'X': 0, 'Z': 0}
借书种类概率:
 {'A': 0, 'B': 3/85, 'C': 1/85, 'D': 44/85, 'E': 2/85, 'F': 1/5, 'G': 0, 'H': 8/85, 'I': 6/85, 'J': 1/85, 'K': 3/85, 'N': 0, 'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'X': 0, 'Z': 0}

计算借书种类的熵

在这里插入图片描述
并且规定0 log(0) =0

import pandas as pd
from sympy import *
# 横向最多显示多少个字符, 一般80不适合横向的屏幕,平时多用200
pd.set_option('display.width', 500)
# 显示所有列
pd.set_option('display.max_columns', None)

#图书类别
Book_category = ['A','B','C','D','E','F','G','H','I','J','K','N','O','P','Q','R','S','T','U','V','X','Z']
# # 读取数据
# Person_data = pd.read_excel(r'Person_8748847336.xlsx').iloc[0: , 1:]
LOAN_DATE = ['2013/1/1', '2013/1/1', '2013/1/1', '2013/1/1', '2013/1/1', '2013/2/20', '2013/4/19', '2013/4/19', '2013/4/19', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/24', '2013/4/27', '2013/5/8', '2013/5/15', '2013/5/15', '2013/5/17', '2013/6/6', '2013/6/8', '2013/6/8', '2013/10/9', '2013/10/29', '2013/10/29', '2013/11/7', '2013/11/7', '2013/11/14', '2013/11/14', '2013/11/14', '2013/11/25', '2013/12/2', '2013/12/2', '2013/12/4', '2013/12/6', '2013/12/11', '2013/12/11', '2013/12/18', '2013/12/18', '2014/1/1', '2014/1/2', '2014/2/17', '2014/2/17', '2014/3/13', '2014/3/13', '2014/4/15', '2014/4/29', '2014/4/29', '2014/4/29', '2014/5/5', '2014/9/23', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/10/10', '2014/12/2', '2014/12/2', '2014/12/2', '2014/12/4', '2014/12/4', '2015/1/14', '2015/2/28', '2015/2/28', '2015/3/5', '2015/3/5', '2015/3/5', '2015/3/5', '2015/3/11', '2015/3/11', '2015/3/11', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/12', '2015/3/19', '2015/3/19']
time_number = ['2013/1/1', '2013/2/20', '2013/4/19', '2013/4/24', '2013/4/27', '2013/5/8', '2013/5/15', '2013/5/17', '2013/6/6', '2013/6/8', '2013/10/9', '2013/10/29', '2013/11/7', '2013/11/14', '2013/11/25', '2013/12/2', '2013/12/4', '2013/12/6', '2013/12/11', '2013/12/18', '2014/1/1', '2014/1/2', '2014/2/17', '2014/3/13', '2014/4/15', '2014/4/29', '2014/5/5', '2014/9/23', '2014/10/10', '2014/12/2', '2014/12/4', '2015/1/14', '2015/2/28', '2015/3/5', '2015/3/11', '2015/3/12', '2015/3/19']
ITEM_CALLNO = ['H', 'H', 'H', 'H', 'H', 'D', 'F', 'F', 'F', 'D', 'E', 'E', 'D', 'D', 'D', 'F', 'F', 'H', 'B', 'B', 'B', 'C', 'D', 'D', 'I', 'F', 'I', 'F', 'D', 'I', 'F', 'F', 'D', 'F', 'F', 'H', 'F', 'F', 'F', 'F', 'D', 'F', 'D', 'D', 'D', 'K', 'K', 'D', 'I', 'I', 'I', 'D', 'H', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'F', 'K', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'D', 'J', 'D']

N = len(ITEM_CALLNO)    #样本数量
t_n = len(time_number)    #次数
B_N = len(Book_category)
print("样本数量:",N)
#列表转化为字典
def Book_list_to_dic():
    Book_data_dic = {}
    for B in Book_category:
        Book_data_dic[B] = 0
    for B_I in ITEM_CALLNO:
        Book_data_dic[B_I] += 1
    return Book_data_dic

Book_data_dic = Book_list_to_dic()
print("借书种类及数量统计:\n",Book_data_dic)
#计算借书种类概率
Book_Probablity_dic = Book_data_dic
for B in Book_category:
    Book_Probablity_dic[B] = Rational(Book_Probablity_dic[B], N)
print("借书种类概率P:\n",Book_Probablity_dic)
#计算借书种类的熵
Book_Entrop = 0
for B in Book_category:
    if Book_Probablity_dic[B] == 0:
        Book_Entrop += 0
    else:
        Book_Entrop += (- Book_Probablity_dic[B] * log(Book_Probablity_dic[B], 2)).evalf()
print("借书种类的熵H(X):",Book_Entrop)

结果:

样本数量: 85
借书种类及数量统计:
 {'A': 0, 'B': 3, 'C': 1, 'D': 44, 'E': 2, 'F': 17, 'G': 0, 'H': 8, 'I': 6, 'J': 1, 'K': 3, 'N': 0, 'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'X': 0, 'Z': 0}
借书种类概率P:
 {'A': 0, 'B': 3/85, 'C': 1/85, 'D': 44/85, 'E': 2/85, 'F': 1/5, 'G': 0, 'H': 8/85, 'I': 6/85, 'J': 1/85, 'K': 3/85, 'N': 0, 'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'X': 0, 'Z': 0}
借书种类的熵H(X): 2.16560964755005
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值