注意:
spark环境的jars下要放入mongo的jar包:
mongo-spark-connector_2.11-2.4.1.jar
2.11是scala版本
mysql-connector-java-8.0.21.jar
mongo-java-driver-3.10.2.jar
package com.xy.use
import java.net.URLEncoder
import org.apache.spark.sql.types.{
StringType, StructField, StructType}
import org.apache.spark.sql.{
SaveMode, SparkSession}
//功能描述
/*
* mongodb表LiveStreamAnchor分区读取,写入hdfs
* */
object Tohdfs {
def main(args: Array[String]): Unit = {
var mongoUser:String = URLEncoder.encode("username","utf-8")
var mongoPwd: String = URLEncoder.encode("password","utf-8")
var ipport : String = "ip:3717"
var dataBase : String = "库.表"
//mongodb://testAdmin:123456@127.0.0.1:27017/admin
var mongoUrl = s"mongodb://${mongoUser}:${mongoPwd}@$ipport/$dataBase"
// ————————————————
// 版权声明:本文为CSDN博主「Alex_81D」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
// 原文链接:https://blog.csdn.net/Alex_81D/article/details/108534641
val spark = SparkSession.builder()
// .master("local[*]")
.appName("MyApp")
.config("spark.mongodb.input.uri", mongoUrl)
.getOrCreate()
// 设置log级别
// spark.sparkContext.setLogLevel("WARN")
//
// val df = MongoSpark.load(spark)
// 设置log级别
spark.sparkContext.setLogLevel("WARN")
val schema = StructType(
List(
StructField("_id", StringType),
StructField("co1", StringType),
StructField("co1", StringType) ,
StructField("co1", StringType),
StructField("co1",StringType ) ,
StructField("co1", StringType),
StructField("co1", StringType) ,
StructField("co1", StringType) ,
StructField("co1", StringType),
StructField("co1", StringType)
)
)
// 通过schema约束,直接获取需要的字段
val df = spark.read.format("com.mongodb.spark.sql")
.option("spark.mongodb.input.partitioner", "DefaultMongoPartitioner")
.option(