package org.spark.mongo;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.bson.BSONObject;
import com.mongodb.hadoop.MongoOutputFormat;
import scala.Tuple2;
public class SparkMongotoMongo {
public static void main(String args[]){
SparkConf sparkConf = new SparkConf().setAppName("mongotomongo");
//master
sparkConf.setMaster("spark://dwrj5114:7077,dwrj5113:7077");
//Spark Context
JavaSparkContext sc = new JavaSparkContext(sparkConf);
sc.addJar("F:\\jars\\sparkmongo.jar");
//mongo config
//解释 主机:端口号/数据库名.Collection名
Configuration config = new Configuration();
config.set("mongo.input.uri", "mongodb://10.1.50.124:27017/ligf.student");
config.set("mongo.output.uri", "mongodb://10.1.50.124:27017/ligf.test");
JavaPairRDD<Object, BSONObject> mongoRDD = sc.newAPIHadoopRDD(config, com.mongodb.hadoop.MongoInputFormat.class, Object.class, BSONObject.class);
// Input contains tuples of (ObjectId, BSONObject)
JavaRDD<String> words = mongoRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, String>() {
private static final long serialVersionUID = 7780225729554937350L;
@Override
public Iterable<String> call(Tuple2<Object, BSONObject> document) {
String id=(String)document._2.get("id");
String zhuanye=(String)document._2.get("zhuanye");
if(id.equals("")&&zhuanye.equals("")){
return Collections.emptyList();
}else{
String total=id+","+zhuanye;
return Arrays.asList(total);
}
// if (o instanceof BSONObject) {
// BSONObject bso = (BSONObject) o;
// String street = (String)bso.get("street");
// String city = (String)bso.get("city");
// String state = (String)bso.get("state");
// String address = street+", "+city+", "+state;
// return Arrays.asList(address);
// } else {
// return Collections.emptyList();
// }
}
});
//打印出每个人的信息
System.out.println("打印出每个人的信息");
List<String> outputs = words.collect();
if(outputs.size()==0){
System.out.println("aaaaaaaaaaaaaaaaaaa");
}
System.out.println("打印出每个人的信息2");
for (String out : outputs) {
System.out.println("output:"+out);
}
System.out.println("打印出每个人的信息3");
// Only MongoOutputFormat and config are relevant
mongoRDD.saveAsNewAPIHadoopFile("file:///inputDir", Object.class, Object.class, MongoOutputFormat.class, config);
sc.close();
}
}
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.bson.BSONObject;
import com.mongodb.hadoop.MongoOutputFormat;
import scala.Tuple2;
public class SparkMongotoMongo {
public static void main(String args[]){
SparkConf sparkConf = new SparkConf().setAppName("mongotomongo");
//master
sparkConf.setMaster("spark://dwrj5114:7077,dwrj5113:7077");
//Spark Context
JavaSparkContext sc = new JavaSparkContext(sparkConf);
sc.addJar("F:\\jars\\sparkmongo.jar");
//mongo config
//解释 主机:端口号/数据库名.Collection名
Configuration config = new Configuration();
config.set("mongo.input.uri", "mongodb://10.1.50.124:27017/ligf.student");
config.set("mongo.output.uri", "mongodb://10.1.50.124:27017/ligf.test");
JavaPairRDD<Object, BSONObject> mongoRDD = sc.newAPIHadoopRDD(config, com.mongodb.hadoop.MongoInputFormat.class, Object.class, BSONObject.class);
// Input contains tuples of (ObjectId, BSONObject)
JavaRDD<String> words = mongoRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, String>() {
private static final long serialVersionUID = 7780225729554937350L;
@Override
public Iterable<String> call(Tuple2<Object, BSONObject> document) {
String id=(String)document._2.get("id");
String zhuanye=(String)document._2.get("zhuanye");
if(id.equals("")&&zhuanye.equals("")){
return Collections.emptyList();
}else{
String total=id+","+zhuanye;
return Arrays.asList(total);
}
// if (o instanceof BSONObject) {
// BSONObject bso = (BSONObject) o;
// String street = (String)bso.get("street");
// String city = (String)bso.get("city");
// String state = (String)bso.get("state");
// String address = street+", "+city+", "+state;
// return Arrays.asList(address);
// } else {
// return Collections.emptyList();
// }
}
});
//打印出每个人的信息
System.out.println("打印出每个人的信息");
List<String> outputs = words.collect();
if(outputs.size()==0){
System.out.println("aaaaaaaaaaaaaaaaaaa");
}
System.out.println("打印出每个人的信息2");
for (String out : outputs) {
System.out.println("output:"+out);
}
System.out.println("打印出每个人的信息3");
// Only MongoOutputFormat and config are relevant
mongoRDD.saveAsNewAPIHadoopFile("file:///inputDir", Object.class, Object.class, MongoOutputFormat.class, config);
sc.close();
}
}