redis模拟数据流(python)

根据数据分析那篇文章可知,若实现强化推荐,必须模拟真实数据流。

首先明确,买方和买方的数据流应包括哪些特征:

买方数据流:买方ID、物品ID、操作(浏览、加购、购买)、操作时间

卖方数据流(只针对物品属性的改变):物品ID、属性(为了简单,只选择价格、类别,商品信息暂不考虑)、属性值、操作时间

用户信息和商品信息的维护使用redis数据库。

按照流程,首先下载Retailrocket数据集,通过python 实现分行读取:

import redis
import pandas as pd
def buy_stream():
    
    buy_data=pd.read_csv("data/events.csv").sort_values(by="timestamp")
    for r  in buy_data[:10].itertuples():
        yield  r
def sell_stream():
    sell_data_1 = pd.read_csv("data/item_properties_part1.csv")
    sell_data_2 = pd.read_csv("data/item_properties_part2.csv")
    sell_data=pd.concat([sell_data_1,sell_data_2]).sort_values(by="timestamp")
    for r in sell_data[:10].itertuples():
        yield r

i=0
buy=buy_stream()
sell=sell_stream()
buy_row = next(buy)
sell_row = next(sell)

while(True):#
    if int(buy_row.timestamp)<int(sell_row.timestamp):
        print(buy_row)# 执行步骤3
        try: buy_row=next(buy)
        except:break


    else:
        print(sell_row)#执行步骤4
        try: sell_row=next(cell)
        except:pass

使用redis作为中间件存储流:

import redis
import pandas as pd


pool = redis.ConnectionPool(host='......',port=6380,decode_responses=True,password='123456')
view_item=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=0)
view_cate=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=1)
view_price=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=2)

add_item=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=3)
add_cate=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=4)
add_price=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=5)

buy_item=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=6)
buy_cate=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=7)
buy_price=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=8)

item_db=redis.Redis(host='......',port=6380,decode_responses=True,password='123456',db=9)
item_db.flushall()
# view_db.set('name','test')
# # print(view_db.get('name'))
buy_data=pd.read_csv("data/events.csv").sort_values(by="timestamp")

sell_data_1 = pd.read_csv("data/item_properties_part1.csv")
sell_data_2 = pd.read_csv("data/item_properties_part2.csv")

sell_data=pd.concat([sell_data_1,sell_data_2]).sort_values(by="timestamp")

item_cate=sell_data[sell_data.property=="categoryid"].drop_duplicates(subset=['itemid'],keep='first',inplace=False)
item_cate.rename(columns={'value':'categoryid'}, inplace = True)
item_cate=item_cate[['itemid','categoryid']]

item_price=sell_data[sell_data.property=="790"].drop_duplicates(subset=['itemid'],keep='first',inplace=False)
item_price.rename(columns={'value':'price'}, inplace = True)
item_price=item_price[['itemid','price']]

item_available=sell_data[sell_data.property=="available"].drop_duplicates(subset=['itemid'],keep='first',inplace=False)
item_available.rename(columns={'value':'available'}, inplace = True)
item_available=item_available[['itemid','available']]

con_data=pd.merge(buy_data,item_cate,on="itemid")
con_data=pd.merge(con_data,item_price,on="itemid")
con_data=pd.merge(con_data,item_available,on="itemid").sort_values(by="timestamp")


# print(con_data)
def buy_stream(con_data):
    for r in con_data[:10000].itertuples():
        yield r

def sell_stream(sell_data):
    for r in sell_data[:10000].itertuples():
        yield r

i=0
buy=buy_stream(con_data)
sell=sell_stream(sell_data)
buy_row = next(buy)
sell_row = next(sell)

i=0

E=0# 记录物品是否已入库
while(True):#
    i += 1
    if i%10==0:
        print(buy_row.visitorid,view_item.lrange(buy_row.visitorid, 0, -1))

    if int(buy_row.timestamp)<int(sell_row.timestamp):
        # print(buy_row)# 执行步骤3
        if E==1 and view_item.exists(buy_row.visitorid):
            # pass # 输入用户特征和候选物品,执行推荐过程,返回推荐结果,生成反馈,存入动作池,根据更新策略执行更新。
            if buy_row.event=='view':
            # 更新物品的属性值
                view_item.rpush(buy_row.visitorid, buy_row.itemid)
                view_cate.rpush(buy_row.visitorid, item_db.hget(buy_row.itemid, "categoryid"))
                view_price.rpush(buy_row.visitorid, item_db.hget(buy_row.itemid, "price"))
            #
        else:# 使用物品的初始属性
            if buy_row.event == 'view':
                view_item.rpush(buy_row.visitorid,buy_row.itemid)
                view_cate.rpush(buy_row.visitorid,buy_row.categoryid)
                view_price.rpush(buy_row.visitorid,float(buy_row.price[1:]))
        try: buy_row=next(buy)
        except:break

    else:
        # print(sell_row)#执行步骤4
        if sell_row.property in ["categoryid","available","790"]:
            E=1
            if sell_row.property=="790":
                item_db.hset(sell_row.itemid,"price",float(sell_row.value[1:]))
            else:
                item_db.hset(sell_row.itemid,sell_row.property,sell_row.value)

            try: sell_row=next(cell)
            except:pass

测试完毕,接下来设计可交互的推荐模型。 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值