话不多数,代码来了
sparkcore和mongodb
package com.bw.day02
import com.mongodb.spark.MongoSpark
import com.mongodb.spark.rdd.MongoRDD
import org.apache.spark.{SparkConf, SparkContext}
import org.bson.Document
object MongoSparkRDD {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setMaster("local[*]").setAppName("MongoSparkRDD")
.set("spark.mongodb.input.uri", "mongodb://he:123456@192.168.12.11:27017/bike.log")
.set("spark.mongodb.output.uri", "mongodb://he:123456@192.168.12.11:27017/bike.result")
//创建sparkContext
val sc = new SparkContext(conf)
val MongoRDD: MongoRDD[Document] = MongoSpark.load(sc)
val pv: Long = MongoRDD.count()
//过滤,缓存 按照每天
val uv = MongoRDD.map(rdd => {
rdd.getString("openid")
}).distinct().count()
println("pv " +