大数据之路【第十五篇】:数据挖掘--推荐算法

import web
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

sys.path.append("./jieba/")

import jieba
import jieba.posseg
import jieba.analyse

urls = (
    '/', 'index',
    '/test', 'test',
)

app = web.application(urls, globals())


rec_map = {}
with open('inverted.data', 'r') as fd:
    for line in fd:
    ss = line.strip().split('\t')
    if len(ss) != 2:
        continue
    token = ss[0].strip().encode('utf8')
    music_rec_list_str = ss[1].strip()

    for music_score in music_rec_list_str.split(''):
        name, score = music_score.strip().split('')
        if token not in rec_map:
        rec_map[token] = []
        rec_map[token].append((name, round(float(score), 2)))


print len(rec_map)

class index:
    def GET(self):
    params = web.input()
    content = params.get('content', '')
    print 'content: ', content

    # for k, v in rec_map.items():
    #     if content == k:
    #         print k
    #         print v
    #         print '===='

    # if content not in rec_map.keys():
    #     return 'no found!'
    # else:
    #     tmp_list = []
    #     for tup in rec_map[content.encode('utf8')]:
    #         name, score = tup 
    #     print name
    #     tmp_list.append(name)
    #     return '\n'.join(tmp_list)

    seg_list = jieba.cut(content, cut_all=False)

    result_map = {}
    for seg in seg_list:
        print 'seg: ', seg
        if seg in rec_map.keys():
        print '1111111111'
        for name_score in rec_map[seg.encode('utf8')]:    
            tmp_name, score = name_score
            name = tmp_name.encode('utf8')    
            if name not in result_map:
            print '22222222'
            result_map[name] = score
            else:
            print '3333333'
                old_score = result_map[name]
            new_score = old_score + score
            result_map[name] = new_score

    rec_list = []
    for k, v in result_map.items():
        rec_list.append('\t'.join([k, str(v)]))

    return "\r\n".join(rec_list)

class test:
    def GET(self):
    print web.input()
    return '222'

if __name__ == "__main__":
    app.run()

搜索MV推荐

 搜索周杰伦

 

转载于:https://www.cnblogs.com/hackerer/p/11482159.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值