pipeline:通道
作用:整合其它算法一起执行
import org.ansj.recognition.impl.StopRecognition
import org.ansj.splitWord.analysis.ToAnalysis
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{HashingTF, IDF}
import org.apache.spark.sql.SparkSession
import scala.util.matching.Regex
/***
* 判断评论是正向还是负向的
*/
object PipelineDemo {
def main(args: Array[String]): Unit = {
//读取数据
val spark = SparkSession.builder()
.appName("news")
.master("local")
.getOrCreate()
//读取的文件是txt格式的 最好用spark.contxt方式去读取
val sc = spark.sparkContext
//读取负极
val negData = sc.textFile("data\\news\\trainNeg.txt").map((_,1))
//读取积极
val posData = sc.textFile("data\\news\\trainPos.txt").map((_,1))
//将两个集合结合在一起
val newsData = negData