package cn.itcast.tags.ml.classification
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{StringIndexer, StringIndexerModel, VectorIndexer, VectorIndexerModel}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tuning.{CrossValidator, CrossValidatorModel, ParamGridBuilder}
import org.apache.spark.sql.{DataFrame, SparkSession}
object RfModel {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName(this.getClass.getSimpleName.stripSuffix("$"))
.master("local[4]")
.getOrCreate()
import org.apache.spark.sql.functions._
import spark.implicits._
// 1. 加载数据
val dataframe: DataFrame = spark.read
.format("libsvm")
.load("datas/ship/total001.txt")
// 划分数据集:训练数据和测试数据
va
Spark随机森林算法交叉验证、管道模型(pipeline)、模型评估代码实例
最新推荐文章于 2024-04-29 23:48:30 发布