基于随机梯度下降分类算法的活动推荐系统

最新推荐文章于 2024-01-08 01:24:39 发布

littleadams

最新推荐文章于 2024-01-08 01:24:39 发布

阅读量644

点赞数 2

文章标签：机器学习数据分析

本文链接：https://blog.csdn.net/weixin_40362097/article/details/81624796

版权

本文基于随机梯度下降算法，构建了针对用户的活动推荐系统。

!pip install pycountry
!pip install kaggle-cli

#!kg download -u 'Frank_hongyangfan@163.com' -p 'frank19901104' -c event-recommendation-engine-challenge -f events.csv.gz
#!gzip -d events.csv.gz

0. 加载库

from __future__ import division, print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import datetime
import itertools
import hashlib
import locale
import pickle
import pycountry

import scipy.io as sio
import scipy.sparse as ss
import scipy.spatial.distance as ssd

from collections import defaultdict
from sklearn.preprocessing import normalize

# 授权登录
# 安装 PyDrive 操作库，该操作每个 notebook 只需要执行一次
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

def login_google_drive():
  # 授权登录，仅第一次的时候会鉴权
  auth.authenticate_user()
  gauth = GoogleAuth()
  gauth.credentials = GoogleCredentials.get_application_default()
  drive = GoogleDrive(gauth)
  return drive

# 列出Google Drive下所有的文件
def list_file(drive):
  file_list = drive.ListFile({
  'q': "'root' in parents and trashed=false"}).GetList()
  for file1 in file_list:
    print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))

drive = login_google_drive()

list_file(drive)

title: Colab, id: 1SUtiFjgLwfR1nyRu-80mKnRRhIa6mJHC, mimeType: application/vnd.google-apps.folder
title: card2014.pdf, id: 0B6B4WgkpMXnrc3lYVkQybFZ4WkpCSnp1SW5DRGdxMnNSblhB, mimeType: application/pdf

file_list = drive.ListFile({
  'q': "'1SUtiFjgLwfR1nyRu-80mKnRRhIa6mJHC' in parents and trashed=false"}).GetList()
for file1 in file_list:
  print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))

title: Kaggle-event recommendation, id: 1Icj4Bcc6OxMTOQ1oqp2A084146ch5jKG, mimeType: application/vnd.google-apps.folder
title: Credit Fraud Detection, id: 1WOmeGF8Muq1em9ZbeLFa-Ml221ErrBJJ, mimeType: application/vnd.google-apps.folder
title: LSTM, id: 1XhRk76UkPExDooz8oinsFlsS8e8qt8m2, mimeType: application/vnd.google-apps.folder

file_list = drive.ListFile({
  'q': "'1Icj4Bcc6OxMTOQ1oqp2A084146ch5jKG' in parents and trashed=false"}).GetList()
for file1 in file_list:
  print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))

title: kaggle_event_recommendation, id: 13D1FY3MQU548QVqC8ooZAvvy4BgG0SSI, mimeType: application/vnd.google.colaboratory
title: test.csv, id: 1Y7YXncoQuvv6z2J4_JP-hbtVVmyaJvIl, mimeType: text/csv
title: train.csv, id: 1i8FtD9G3H5B1QcmWG8yZ4w3aq12ARQcT, mimeType: text/csv

# 缓存数据到工作环境

def cache_data():
  # id 替换成上一步读取到的对应文件 id
  train_csv = drive.CreateFile({
  'id': "1i8FtD9G3H5B1QcmWG8yZ4w3aq12ARQcT"}) 
  test_csv = drive.CreateFile({
  'id': "1Y7YXncoQuvv6z2J4_JP-hbtVVmyaJvIl"})

  #这里的下载操作只是缓存，不会在你的Google Drive 目录下多下载一个文件

  train_csv.GetContentFile('train.csv', "text/csv")
  test_csv.GetContentFile('test.csv', "text/csv")

  print("缓存成功")

cache_data()

缓存成功

1. 定义数据清洗类

class DataCleaner(object):
    '''
    Common utilities for converting strings to equivalent numbers or number buckets
    '''
    def __init__(self):
        #载入locale
        self.localeIdMap = defaultdict(int)       
        for i, l in enumerate(locale.locale_alias.keys()):
            self.localeIdMap[l] = i + 1

        # 载入countries
        self.countryIdMap = defaultdict(int)
        self.ctryIdx = defaultdict(int)
        for i, c in enumerate(pycountry.countries):
            self.countryIdMap[c.name.lower()] = i + 1
            if c.name.lower() == 'usa':
                self.ctryIdx['US'] = i
            if c.name.lower() == 'canada':
                self.ctryIdx['CA'] = i
        for cc in self.ctryIdx.keys():
            for s in pycountry.subdivisions.get(country_code=cc):
                self.countryIdMap[s.name.lower()] = self.ctryIdx[cc] + 1

        # 载入genderId
        self.genderIdMap = defaultdict(int, {
  'male':1, 'female':2})

    def getLocaleId(self, locstr):
        return self.localeIdMap[locstr.lower()]

    def getGenderId(self, genderStr):
        return self.genderIdMap[genderStr.lower()]

    def getJoinedYearMonth(self, dateString):
        dttm = datetime.datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%S.%fZ")
        return "".join([str(dttm.year), str(dttm.month)])

    def getCountryId(self, location):
        if (isinstance(location, str)
            and len(location.strip()) > 0
            and location.rfind("  ") > -1):
            return self.countryIdMap[location[location.rfind(

最低0.47元/天解锁文章

littleadams

关注

2
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
基于随机梯度下降分类算法的活动推荐系统

本文基于随机梯度下降算法，构建了针对用户的活动推荐系统。!pip install pycountry!pip install kaggle-cli#!kg download -u 'Frank_hongyangfan@163.com' -p 'frank19901104' -c event-recommendation-engine-challenge -f events.csv.g...
复制链接

扫一扫