头歌 Spark的机器学习-MLlib

自信喵 QAQ

于 2024-05-05 00:40:27 发布

阅读量2.7k

点赞数 5

文章标签： java

本文链接：https://blog.csdn.net/bzzb52/article/details/138454532

版权

第1关 MLlib介绍

package com.educoder.bigData.sparksql5;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.classification.LogisticRegression;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.feature.Tokenizer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
public class Test1 {
   
public static void main(String[] args) {
   
SparkSession spark = SparkSession.builder().appName("test1").master("local").getOrCreate();
List<Row> trainingList = Arrays.asList(
RowFactory.create(1.0, "a b c d E spark"),
RowFactory.create(0.0, "b d"),
RowFactory.create(1.0, "hadoop Mapreduce"),
RowFactory.create(0.0, "f g h"));
List<Row> testList = Arrays.asList(
RowFactory.create(0.0, "spark I j k"),
RowFactory.create(0.0, "l M n"),
RowFactory.create(0.0, "f g"),
RowFactory