我用的是Intellij编写的scala程序
首先配置build.sbt文件
name := "mytest" version := "1.0" scalaVersion := "2.11.0" libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.2.0"
然后编写主程序/** * Created by lxs on 17-7-21. */ import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ object sparkApp { def main(args: Array[String]): Unit = { val sc=new SparkContext("local[2]","First Spark App") val data=sc.textFile("/home/lxs/Documents/UserPurchaseHistory.csv") .map(line => line.split(",")) .map(purchaseRecord => (purchaseRecord(0),purchaseRecord(1),purchaseRecord(2))) //求购买次数 val numPurchases=data.count() //求多少个不同客户买过商品 val uniqueUses=data.map{case(user,product,price) =>user}.distinct().count() //求和得出总收入 val totalRevenue=data.map{case(user,product,price) => price.toDouble}.sum() //求得最畅销商品 val productsByPopularity=data.map{case(user,product,price) =>(product,1)}.reduceByKey(_+_).collect().sortBy(-_._2) val mostPopular=productsByPopularity(0) println("Total purchases:"+ numPurchases) println("unique users:"+uniqueUses) println("Total revene:"+totalRevenue) println("Most popular product:%s with %d purchases".format(mostPopular._1,mostPopular._2)) } }