分类算法
根据特征数据区分属于哪一类
比如微博 发表文章判断属于哪一类,正向还是负向
类别:支持向量机
决策树(本章具体描述决策树)
**决策树:**将特征进行划分,进行一些列处理
例子:中午吃饭,兜里钱是否大于20,此时继续判断钱是否大于25,如果大于吃黄焖鸡+可乐,不大于黄焖鸡。如果兜里钱一开始就不大于20吃泡面
object Happiness {
/**
* 判断人是否幸福
* */
def main(args: Array[String]): Unit = {
//读取数据
val spark = SparkSession.builder()
.appName("happiness")
.master("local")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")//减少日志信息
val data = spark.read
.option("header","true")//首行不读
.csv("data/happiness_train_complete.csv")
//筛选需要的数据
//province:地点 gender:性别 religion:宗教信仰 edu:教育程度 floor_area:房屋占地面积 health:身体健康状况
//happiness:是否幸福 social_friend:和朋友娱乐程度 income:去年收入
.select("province","gender","religion","edu","floor_area"
,"health","social_friend","income","happiness")
.rdd
val labelPoint = data.map{
line => {
var happiness = line.getString(8).toDouble
if(happiness>3){
happiness = 1.0
}else{
happiness = 0.0
}
var social_friend = line.getString(6)
if(social_friend ==null || social_friend.equals("")){
social_friend = "0.0"
}
val feature = Array[Double](line.getString(0).toDouble,
line.getString(1).toDouble,line.getString(2).toDouble,
line.getString(3).toDouble,line.getString(4).toDouble,
line.getString(5).toDouble,social_friend.toDouble,
line.getString(7).toDouble
)
LabeledPoint(happiness,Vectors.dense(feature))
}
}
//划分训练集和测试集
val Array(tranData,testData) = labelPoint.randomSplit(Array(0.8,0.2))
//训练
val model = DecisionTree.trainClassifier(tranData,
7,//特征数量
Map[Int,Int](),//保存类别特征类型
"gini",//数据完整度 表示不纯行度量方式
5,//树的深度
32//最大参数
)
//决策树
val result = testData.map{
line => {
(line.label,model.predict(line.features))
}
}
result.take(10).foreach(println(_))
println(model.toDebugString)
//获得相关值
val metrics = new MulticlassMetrics(result)//检测结果model
println(s"结果准确度:${metrics.accuracy}")
spark.close()//关闭资源
}
}