第1关 MLlib介绍
package com.educoder.bigData.sparksql5;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.classification.LogisticRegression;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.feature.Tokenizer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
public class Test1 {
public static void main(String[] args) {
SparkSession spark = SparkSession.builder().appName("test1").master("local").getOrCreate();
List<Row> trainingList = Arrays.asList(
RowFactory.create(1.0, "a b c d E spark"),
RowFactory.create(0.0, "b d"),
RowFactory.create(1.0, "hadoop Mapreduce"),
RowFactory.create(0.0, "f g h"));
List<Row> testList = Arrays.asList(
RowFactory.create(0.0, "spark I j k"),
RowFactory.create(0.0, "l M n"),
RowFactory.create(0.0, "f g"),
RowFactory