@PySpark 实现ALS推荐算法
spark-submit movie_rec.py
from pyspark import SparkConf, SparkContext
from pyspark.mllib.recommendation import ALS, Rating
from math import sqrt
获取所有movie名称和id对应集合
def movie_dict(file):
dict = {}
with open(file,‘r’,encoding = “ISO-8859-1”) as f:
#i=0
for line in f:
#i+=1
#print(“line %d:” %i)
arr= line.split(’|’)
#print(“arr len %d” %len(arr))
movie_id = int(arr[0])
movie_name = str(arr[1])
dict[movie_id] = movie_name
return dict
转换用户评分数据格式
def get_rating(str):
arr = str.split(’\t’)
user_id = int(arr[0])
movie_id = int(arr[1])
user_rating = float(arr[2])
return Rating(user_id, movie_id, user_rating)
conf = SparkConf().setMaster(‘lo