机器学习
------------
监督学习: //具有训练数据
//分类,预测,classify
//朴素贝叶斯分类器。概率的问题。
//生成函数。
非监督学习 //没有具有训练数据
line regress
-------------
衰退。
ALS
-------------
最小二乘法。
/**
* Created by Administrator on 2017/4/9.
*/
import org.apache.spark.{SparkConf, SparkContext}
// $example on$
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
import org.apache.spark.mllib.recommendation.Rating
object RecommDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Recommend").setMaster("local[4]")
val sc = new SparkContext(conf)
// Load and parse the data
val data = sc.textFile("file:///d:/scala/ml/recomm/data2.txt")
//变换数据成为Rating。
val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
Rating(user.toInt, item.toInt, rate.toDouble)
})
// Build the recommendation model using ALS
val rank = 10
val numIterations = 10
//交替最小二乘法算法构建推荐模型
val model = ALS.train(ratings, rank, numIterations, 0.01)
// 取出评分数据的(User,product)
val usersProducts = ratings.map { case Rating(user, product, rate) =>
(user, product)
}
//通过model对(user,product)进行预测,((user, product),rate)
val ug2 = sc.makeRDD(Array((2,3),(2,4)))
val predictions =
model.predict(ug2).map { case Rating(user, product, rate) =>
((user, product), rate)
}
predictions.collect().foreach(println)
//对训练数据进行map ,((user, product),rate)
// val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
// ((user, product), rate)
// }.join(predictions)
//
// ratesAndPreds.collect().foreach(println)
//
// val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
// val err = (r1 - r2)
// err * err
// }.mean()
// println("Mean Squared Error = " + MSE)
// // Save and load model
// model.save(sc, "target/tmp/myCollaborativeFilter")
// val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
// $example off$
}
}
/**
* Created by Administrator on 2017/4/9.
*/
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.sql.SparkSession
/**
* 电影推荐
*/
object MovieRecommDemo {
//定义评级样例类
case class Rating0(userId: Int, movieId: Int, rating: Float, timestamp: Long)
def main(args: Array[String]): Unit = {
val conf = new SparkConf();
conf.setAppName("movieRecomm");
conf.setMaster("local[4]")
val spark = SparkSession.builder().config(conf).getOrCreate() ;
import spark.implicits._
//解析评级
def parseRating(str: String): Rating0 = {
val fields = str.split("::")
assert(fields.size == 4)
Rating0(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong)
}
//转换成Rating的DF对象
var ratings = spark.sparkContext.textFile("file:///D:\\scala\\ml\\recomm\\sample_movielens_ratings.txt");
val ratings0 = ratings.map(parseRating)
val df = ratings0.toDF()
//随机切割训练数据,生成两个一个数组,第一个元素是training,第二个是test
val Array(training, test) = df.randomSplit(Array(0.99, 0.01))
//建ALS推荐算法并设置参数
val als = new ALS().setMaxIter(5)
.setRegParam(0.01)
.setUserCol("userId")
.setItemCol("movieId")
.setRatingCol("rating")
//通过als对象对训练数据进行拟合,生成推荐模型
val model = als.fit(training)
/*******向用户推荐n款商品********/
//val res = model.recommendProducts(5,8);
/*******将指定的商品推荐给n个用户********/
//val res = model.recommendUsers(3,5)
/*******向所有用户推荐3种商品********/
val res = model.recommendProductsForUsers(3)
//使用model对test数据进行变换,实现预测过程
val predictions = model.transform(test);
predictions.collect().foreach(println)
}
}
电影推荐
----------------------
/**
* Created by Administrator on 2017/4/9.
*/
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.sql.SparkSession
/**
* 电影推荐
*/
object MovieRecommDemo {
//定义评级样例类
case class Rating0(userId: Int, movieId: Int, rating: Float, timestamp: Long)
def main(args: Array[String]): Unit = {
val conf = new SparkConf();
conf.setAppName("movieRecomm");
conf.setMaster("local[4]")
val spark = SparkSession.builder().config(conf).getOrCreate() ;
import spark.implicits._
//解析评级
def parseRating(str: String): Rating0 = {
val fields = str.split("::")
assert(fields.size == 4)
Rating0(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong)
}
//转换成Rating的DF对象
var ratings = spark.sparkContext.textFile("file:///D:\\scala\\ml\\recomm\\sample_movielens_ratings.txt");
val ratings0 = ratings.map(parseRating)
val df = ratings0.toDF()
//随机切割训练数据,生成两个一个数组,第一个元素是training,第二个是test
val Array(training, test) = df.randomSplit(Array(0.99, 0.01))
//建ALS推荐算法并设置参数
val als = new ALS().setMaxIter(5)
.setRegParam(0.01)
.setUserCol("userId")
.setItemCol("movieId")
.setRatingCol("rating")
//通过als对象对训练数据进行拟合,生成推荐模型
val model = als.fit(training)
//使用model对test数据进行变换,实现预测过程
val predictions = model.transform(test);
predictions.collect().foreach(println)
}
}
python
------------------
1.安装(windows)
c:\...
2.自动添加PATH
3.验证安装
安装python插件()
---------------
1.打开idea
2.settings->plugin -> install plugin from disk...-> 选择python-community-163.125.zip->ok
创建idea下的python模块
-----------------------
//列表,类似于数组,可以赋值
list = []
//元组,不能赋值.
t = (1,2,3)
//字典,等价于java的map,可以赋值。
dict = {100:"tom",200:"tomas",300:"tomasLee"};
//类型转换
a = 100 ;
str(a)
//s = "(100 + 200) * 3 / (2 + 5)" ;
print(eval(s))
//定义序列
seq = 1,2,3,4,5 ;
t = tuple(seq);
lst = list(seq)
//in操作
//list | tuple | dict都可以使用in操作
//r"hello\r\nworld"
//原样输出
//u"hello"
//unicode输出
//二维列表
list_2d = [[col for col in cols] for row in rows]
print(list_2d)
//