final String mgohost = "127.0.0.1"
SparkSession spark = SparkSession.builder()
.appName("spot")
.config("spark.mongodb.output.uri", "mongodb://spark:spark@" + mgohost + ":27017/admin")
.config("spark.mongodb.output.database","demo")
.config("spark.mongodb.output.collection","test")
.getOrCreate()
- mongodb://用户名:密码@” + mgohost + “:27017/admin
- admin为mongodb系统database,通过系统库连接database权限认证通过
- demo指定其它database
- test指定collection
JavaSparkContext jc = new JavaSparkContext(spark.sparkContext());
JavaRDD<String> spotInfo = jc.textFile(input);
JavaRDD<Document> javaRDD = spotInfo.map(new Function<String, Document>() {
@Override
public Document call(String arg0) {
try {
Document document = new Document();
document.put("data", arg0);
return document;
} catch (Exception e) {
}
Document document = new Document();
document.put("error", "error");
return document;
}
}).filter(new Function<Document, Boolean>() {
@Override
public Boolean call(Document arg0) throws Exception {
if(arg0.containsKey("error")){
return false;
};
return true;
}
});
MongoSpark.save(javaRDD);
jc.stop();
- 遍历文件每一条数据返回Document保存至mongodb数据库