推荐系统——SVD/SVD++

1,SVD

源代码:svd.py

#Ver1.0
#Zero @2012.5.2
#

import math
import random
import cPickle as pickle


#calculate the overall average
def Average(fileName):
    fi = open(fileName, 'r')
    result = 0.0
    cnt = 0
    for line in fi:
        cnt += 1
        arr = line.split()
        result += int(arr[2].strip())
    return result / cnt



def InerProduct(v1, v2):
    result = 0
    for i in range(len(v1)):
        result += v1[i] * v2[i]

    return result


def PredictScore(av, bu, bi, pu, qi):
    pScore = av + bu + bi + InerProduct(pu, qi)
    if pScore < 1:
        pScore = 1
    elif pScore > 5:
        pScore = 5

    return pScore


def SVD(configureFile, testDataFile, trainDataFile, modelSaveFile):
    #get the configure
    fi = open(configureFile, 'r')
    line = fi.readline()
    arr = line.split()
    averageScore = float(arr[0].strip())
    userNum = int(arr[1].strip())
    itemNum = int(arr[2].strip())
    factorNum = int(arr[3].strip())
    learnRate = float(arr[4].strip())
    regularization = float(arr[5].strip())
    fi.close()

    bi = [0.0 for i in range(itemNum)]
    bu = [0.0 for i in range(userNum)]
    temp = math.sqrt(factorNum)
    qi = [[(0.1 * random.random() / temp) for j in range(factorNum)] for i in range(itemNum)]   
    pu = [[(0.1 * random.random() / temp)  for j in range(factorNum)] for i in range(userNum)]
    print("initialization end\nstart training\n")

    #train model
    preRmse = 1000000.0
    for step in range(100):
        fi = open(trainDataFile, 'r')   
        for line in fi:
            arr = line.split()
            uid = int(arr[0].strip()) - 1
            iid = int(arr[1].strip()) - 1
            score = int(arr[2].strip())         
            prediction = PredictScore(averageScore, bu[uid], bi[iid], pu[uid], qi[iid])

            eui = score - prediction

            #update parameters
            bu[uid] += learnRate * (eui - regularization * bu[uid])
            bi[iid] += learnRate * (eui - regularization * bi[iid]) 
            for k in range(factorNum):
                temp = pu[uid][k]   #attention here, must save the value of pu before updating
                pu[uid][k] += learnRate * (eui * qi[iid][k] - regularization * pu[uid][k])
                qi[iid][k] += learnRate * (eui * temp - regularization * qi[iid][k])
        fi.close()
        #learnRate *= 0.9
        curRmse = Validate(testDataFile, averageScore, bu, bi, pu, qi)
        print("test_RMSE in step %d: %f" %(step, curRmse))
        if curRmse >= preRmse:
            break
        else:
            preRmse = curRmse

    #write the model to files
    fo = file(modelSaveFile, 'wb')
    pickle.dump(bu, fo, True)
    pickle.dump(bi, fo, True)
    pickle.dump(qi, fo, True)
    pickle.dump(pu, fo, True)
    fo.close()
    print("model generation over")

#validate the model
def Validate(testDataFile, av, bu, bi, pu, qi):
    cnt = 0
    rmse = 0.0
    fi = open(testDataFile, 'r')        
    for line in fi:
        cnt += 1
        arr = line.split()
        uid = int(arr[0].strip()) - 1
        iid = int(arr[1].strip()) - 1
        pScore = PredictScore(av, bu[uid], bi[iid], pu[uid], qi[iid])

        tScore = int(arr[2].strip())
        rmse += (tScore - pScore) * (tScore - pScore)
    fi.close()
    return math.sqrt(rmse / cnt)




#use the model to make predict
def Predict(configureFile, modelSaveFile, testDataFile, resultSaveFile):
    #get parameter
    fi = open(configureFile, 'r')
    line = fi.readline()
    arr = line.split()
    averageScore = float(arr[0].strip())
    fi.close()

    #get model
    fi = file(modelSaveFile, 'rb')
    bu = pickle.load(fi)
    bi = pickle.load(fi)
    qi = pickle.load(fi)
    pu = pickle.load(fi)
    fi.close()

    #predict
    fi = open(testDataFile, 'r')
    fo = open(resultSaveFile, 'w')
    for line in fi:
        arr = line.split()
        uid = int(arr[0].strip()) - 1
        iid = int(arr[1].strip()) - 1
        pScore = PredictScore(averageScore, bu[uid], bi[iid], pu[uid], qi[iid])
        fo.write("%f\n" %pScore)
    fi.close()
    fo.close()
    print("predict over")


if __name__ == '__main__':
    configureFile = 'svd.conf'
    trainDataFile = 'u1.base'
    testDataFile = 'u1.test'
    modelSaveFile = 'svd_model.pkl'
    resultSaveFile = 'prediction'

    #print("%f" %Average("u1.base"))
    SVD(configureFile, testDataFile, trainDataFile, modelSaveFile)
    #Predict(configureFile, modelSaveFile, testDataFile, resultSaveFile)

配置文件svd.conf:


3.528350 6040 3900 10 0.01 0.05
averageScore userNum itemNum factorNum learnRate regularization 

实验结果:
数据集一:movielen(u1.base/u1.test)

这里写图片描述

迭代了39次,最终的RMSE=0.919047

数据集二:movielen(u2.base/u2.test)

这里写图片描述

迭代了38次,最终的RMSE=0.916170

注:跑程序之前,记得在SVD.conf中修改

在Matlab中,SVD表示奇异值分解(Singular Value Decomposition)。奇异值分解可以将一个大矩阵分解为三个小矩阵相乘,分别为U、S和V。U是一个m×m的正交矩阵,S是一个对角矩阵,而V是一个n×n的正交矩阵。 在Matlab中,通过使用SVD函数进行SVD分解,可以获得矩阵X的奇异值分解。函数的使用方式为[U,S,V] = svd(X)。其中,U是包含X的左奇异向量的矩阵,S是包含X的奇异值的对角矩阵,V是包含X的右奇异向量的矩阵。 特别地,当使用SVD(X,'econ')时,会产生"经济规模"的分解。如果X是m×n的矩阵,并且m大于等于n,则只计算U的前n列,S是一个n×n的对角矩阵。如果m小于n,则只计算V的前m列,S是一个m×m的对角矩阵。 总结来说,在Matlab中,SVD分解是一种用于将大矩阵分解成小矩阵相乘的方法,通过使用svd函数可以得到矩阵的奇异值分解,其中U、S和V分别代表左奇异向量、奇异值和右奇异向量。在使用SVD函数时,还可以选择是否进行"经济规模"的分解。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *2* *3* [奇异值分解——matlab中svd函数用法总结](https://blog.csdn.net/weixin_44567900/article/details/108831329)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 100%"] [ .reference_list ]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值