Matrix Factorization

 

import numpy as np
import random
#import numba.cuda
from itertools import islice


cc=dict()
dd=dict()
ee=dict()

def norm(w):   
    total=0
    for i in range(len(w)):
        total+=np.exp(w[i])
    w_=[(np.exp(w[i])/total) for i in range(len(w))]    
    return w_
 
def dotSum(l,v):
    s=0
    for i in range(len(l)):
        s+=l[i]*v[i]
    return s
    
def pSum(ll):
    weight_x=np.random.rand(128,1)
    weight_y=np.random.rand(128,1)
    for i in ll:
        if i not in cc:
            weight_x+=weight_y
        else:
            weight_x+=cc[i]
    return weight_x
    
#@numba.cuda.jit
def matrix(user,item,click,uiCon):
      num_=len(user)
    #2384384 2420902
      weight_x=np.random.rand(128,1)
      weight_y=np.random.rand(128,1)

 #   x=np.random.rand((2320895,128))
  #  y=np.random.rand((2000000,128))
  #  r=np.zeros((2320,6096),dtype=float) r[user[i]][item[i]]
      delta=0.01
      ppS={}
      ppT={}
      for it in range(num_):
            ee[it]=dotSum(weight_x,weight_y)
            for t in range(128):
              # pSum(uiCon[user[it]]) *(ee[it]-click[it])
                if item[it] not in dd:
                    weight_x[t]*=click[it]*weight_y[t]
                else:
           
                    weight_x[t]+=delta*dd[item[it]][t]
                if user[it] not in cc:                   
                    weight_y[t]+=delta*(weight_x[t])
                else:
                    weight_y[t]+=delta*(cc[user[it]][t])
                if item[it] not in ppS:
                    ppS[item[it]]=weight_y[t]*weight_x[t]
                else:
                    ppS[item[it]]+=weight_y[t]*weight_x[t]
                if user[it] not in ppT:
                    ppT[user[it]]=weight_y[t]*weight_x[t]
                else:
                    ppT[user[it]]+=weight_y[t]*weight_x[t]
    
                
            weight_xx=norm(weight_x)
            weight_yy=norm(weight_y)
            cc[user[it]]=np.array(weight_xx)
            dd[item[it]]=np.array(weight_yy)
            
            ee[it]=(weight_xx,weight_yy)
  #    print(ee[80]) 
      iter=0
      u=np.zeros((2420903,1))
      v=np.zeros((2384385,1))
      temptS=0
      temptT=0
      print(ee[90])
      while iter<30:

        for id in range(num_):    
            for idd in range(num_): 
                if u[item[idd]]==1:
                   continue
                u[item[idd]]=1
                temptS+=dotSum(ppS[item[idd]],dd[item[idd]])
                if v[user[idd]]==1:
                   continue
                v[user[idd]]=1
                temptT+=dotSum(ppT[user[idd]],cc[user[idd]])
            for t in range(128):
           #+pSum(uiCon[user[id]])
                weight_y[t]*=(cc[user[id]][t]/temptS)
                weight_x[t]*=(dd[item[id]][t]/temptT)
#            weight_xx=norm(weight_x)
#            weight_yy=norm(weight_y)
            cc[user[id]]=weight_x
            dd[item[id]]=weight_y
            ee[id]=dotSum(weight_x,weight_y)
        
        iter+=1
      print(cc[user[12]])  

      return cc,dd
    
    


if __name__=="__main__":
    y_result=[]
    f=open('rec_log_train.txt','r')
    e=open('user_sns.txt','rb')
    count=-1
    ass=[]
    for count,line in enumerate(f):
        s=" "
        ss=[]
        if count>=300:
            break
        for j in line:  
            if j=="\t":
                ss.append(s.strip())
                s=" "
                continue
            s+=j
        ass.append(ss)
    userCon=dict()
#select user_connection
    ess=[]  
    cou=-1
    for cou,line in enumerate(e):
        s=" "
        ss=[]
        if cou>=300:
            break
        lin=str(line, encoding = "utf-8")
        for j in lin:  
            if j=="\t"or j=="\n":
                ss.append(int(s.strip()))
                s=" "
                continue
            else:
                s+=j
        ess.append(ss)
    ps=np.array(ess)
 #   print(ps)
 #   np.savetxt("tempt1.txt",ps,fmt="%s")
    userId=[]
    itemId=[]
    clickId=[]
    tmpt1=0
    tmpt2=0
    for i in range(len(ass)):
#        if int(ass[i][0])>tmpt1:
#            tmpt1=int(ass[i][0])
#        if int(ass[i][1])>tmpt2:
#            tmpt2=int(ass[i][1])

        userId.append(int(ass[i][0]))
        itemId.append(int(ass[i][1]))
        if int(ass[i][2])==-1:
            clickId.append(1)
        else:
            clickId.append(0)
 #   print(ess[1][1])
    for ii in range(len(ess)):
       
        if ess[ii][0] not in userCon:
            sas=[]
            sas.append(ess[ii][1])
            userCon[ess[ii][0]]=sas
        else:
           
            userCon[ess[ii][0]].append(ess[ii][1])
#           
    x_,y_= matrix(userId,itemId,clickId,userCon)
   
   
 #   print(x_)

#from sklearn.decomposition import NMF

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值