代码1

本文深入探讨了编程中的一些关键代码实现,通过实例解析了其工作原理和应用场景,旨在提升读者的代码理解和应用能力。
摘要由CSDN通过智能技术生成
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import os
import pickle
import re
from tensorflow.python.ops import math_ops

# 读取User数据
users_title = ['UserID', 'Gender', 'Age', 'JobID', 'Zip-code']
users = pd.read_csv('./ml-1m/users.dat', sep='::', header=None, names=users_title, engine='python',nrows=150)
users = users.filter(regex='UserID|Gender|Age|JobID')     #pandas.DataFrame
users_orig = users.values       #numpy.ndarray
gender_map = {'F': 0, 'M': 1}
users['Gender'] = users['Gender'].map(gender_map)     # type pandas.Series
age_map = {val:ii for ii,val in enumerate(set(users['Age']))}
users['Age'] = users['Age'].map(age_map)

#movies
movies_title = ['MovieID', 'Title', 'Genres']
movies = pd.read_csv('./ml-1m/movies.dat', sep='::', header=None, names=movies_title, engine='python',nrows=150)
movies_orig = movies.values
pattern = re.compile(r'^(.*)\((\d+)\)$')
title_map = {val:pattern.match(val).group(1) for ii,val in enumerate(set(movies['Title']))}
movies['Title'] = movies['Title'].map(title_map)
genres_set = set()   #电影类型转集合, {'Sci-Fi', 'Mystery', 'Crime',}
for val in movies['Genres'].str.split('|'):
    genres_set.update(val)    #修改当前集合,添加新元素
genres_set.add('<PAD>')   #纯添加
genres2int = {val:ii for ii, val in enumerate(genres_set)}   #{"Children's": 0, 'Action': 1, '<PAD>': 4, 'Horror': 16}
genres_map = {val:[genres2int[row] for row in val.split('|')] for ii,val in    enumerate(set(movies['Genres']))}    #{'Crime|Thriller': [15, 2], 'Action|Adventure|Thriller': [5, 0, 2], 
for key in genres_map:   #将电影类型转成等长数字列表,长度是18
    for cnt in range(max(genres2int.values()) - len(genres_map[key])):
        genres_map[key].insert(len(genres_map[key]) + cnt,genres2int['<PAD>'])
movies['Genres'] = movies['Genres'].map(genres_map)
title_set = set()   #电影Title转数字字典
for val in movies['Title'].str.split():
    title_set.update(val)
title_set.add('<PAD>')
title2int = {val:ii for ii, val in enumerate(title_set)}
title_count = 15   #将电影Title转成等长数字列表,长度是15
title_map = {val:[title2int[row] for row in val.split()] for ii,val in enumerate(set(movies['Title']))}
for key in title_map:
    for cnt in range(title_count - len(title_map[key])):
        title_map[key].insert(len(title_map[key]) + cnt,title2int['<PAD>'])
movies['Title'] = movies['Title'].map(title_map)
#读取评分数据集
ratings_title = ['UserID','MovieID', 'ratings', 'timestamps']
ratings = pd.read_csv('./ml-1m/ratings.dat', sep='::', header=None, names=ratings_title, engine = 'python',nrows=150)
ratings = ratings.filter(regex='UserID|MovieID|ratings')
#合并三个表
data = pd.merge(pd.merge(ratings, users), movies)   #pd.merge(df1,df2,合并方式,df1合并用的键,df2合并用的键)
# 将数据分成X和y两张表
target_fields = ['ratings']
features_pd, targets_pd = data.drop(target_fields, axis=1), data[target_fields]  #pandas.DataFrame
features = features_pd.values  # numpy.ndarray
targets_values = targets_pd.values    # numpy.ndarray
pickle.dump((title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig), open('preprocess.p', 'wb'))
title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = pickle.load(open('preprocess
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值