电影推荐系统-整体总结(五)实时推荐
一、Scala代码实现
1.自定义数据类--Model.scala
package streamingRecommender
/**
* @Author : ASUS and xinrong
* @Version : 2020/9/4
* 数据格式转换类
* ---------------电影表------------------------
* 1
* Toy Story (1995)
*
* 81 minutes
* March 20, 2001
* 1995
* English
* Adventure|Animation|Children|Comedy|Fantasy
* Tom Hanks|Tim Allen|Don Rickles|Jim Varney|Wallace Shawn|John Ratzenberger|Annie Potts|John Morris|Erik von Detten|Laurie Metcalf|R. Lee Ermey|Sarah Freeman|Penn Jillette|Tom Hanks|Tim Allen|Don Rickles|Jim Varney|Wallace Shawn
* John Lasseter
*/
case class Movie(val mid:Int,val name:String,val descri:String,
val timelong:String,val cal_issue:String,val shoot:String,
val language:String,val genres :String,val actors:String,val directors:String)
/**
* -----用户对电影的评分数据集--------
* 1,31,2.5,1260759144
*/
case class MovieRating(val uid:Int,val mid:Int,val score:Double,val timastamp:Int)
/**
* --------用户对电影的标签数据集--------
* 15,339,sandra 'boring' bullock,1138537770
*/
case class Tag(val uid:Int,val mid:Int,val tag:String,val timestamp:Int)
/**
*
* MongoDB配置对象
* @param uri
* @param db
*/
case class MongoConfig(val uri:String,val db:String)
/**
* ES配置对象
* @param httpHosts
* @param transportHosts:保存的是所有ES节点的信息
* @param clusterName
*/
case class EsConfig(val httpHosts:String,val transportHosts:String,val index:String,val clusterName:String)
/**
* recs的二次封装数据类
* @param mid
* @param res
*/
case class Recommendation(mid:Int,res:Double)
/**
* Key-Value封装数据类
* @param genres
* @param recs
*/
case class GenresRecommendation(genres:String,recs:Seq[Recommendation])
//注:Seq-Sequence是一个特质,可以理解成一个列表;Recommendation是一个实现类
case class UserRecs(uid:Int,recs:Seq[Recommendation])
/**
* 定义电影相似度
* @param mid
* @param recs
* 注:Seq-Sequence是一个特质,可以理解成一个列表;Recommendation是一个自定义实现类
*/
case class MoviesRecs(mid:Int,recs:Seq[Recommendation])
2.StreamingRecommender类
package streamingRecommender
import com.mongodb.casbah
import com.mongodb.casbah.MongoClient
import com.mongodb.casbah.commons.MongoDBObject
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.kafka.common.serialization.StringDeserializer
import redis.clients.jedis.Jedis
import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
/**
* @Author : ASUS and xinrong
* @Version : 2020/9/25
* 实时推荐部分
*/
object ConnHelper{
lazy val jedis=new Jedis("192.168.212.21")
lazy val mongoClient=MongoClient(casbah.MongoClientURI("mongodb://192.168.212.21:27017/recom"))
}
object StreamingRecommender {
//声明
val MAX_USER_RATINGS_NUM=20 //从Redis中获取多少个用户的评分
val MAX_SIM_MOVIES_NUM=20 //相似电影候选表中取多少个电影
val MONGODB_MOVIE_RECES_COLLECTION="MovieRecs"
val MONGODB_RATING_COLLECTION="Rating"
val MONGODB_STREAM_RECS_COLLECTION="StreamRecs" //实时推荐写入哪张表
def main(args: Array[String]): Unit = {
//一、声明Spark的环境、Kafka和MongoDB的相关信息--------------------------------------------------------------------
val config = Map(
"spark.core" -> "local[3]",
"kafka.topic" -> "recom",
"mongo.uri" -> "mongodb://192.168.212.21:27017/recom",
"mongo.db" -> "recom"
)
val sparkConf = new SparkConf().setAppName("StreamingRecommender").setMaster(config("spark.core"))
val sparkSession = SparkSession.builder().config(sparkConf).getOrCreate()
val sc = sparkSession.sparkContext
//使用SparkStreaming将连续的数据转化成不连续的RDD
//指定采样时间:2秒
val ssc = new StreamingContext(sc, Seconds(2))
//定义隐式参数用于连接MongoDB
implicit val mongoConfig = Mon