import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import
org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import com.fasterxml.jackson.databind.ObjectMapper;
public class BasicLoadJson {
public static class Person implements Serializable{
public String name;
public Boolean lovesPandas;
}
public static class ParseJson implements FlatMapFunction,
Person>{
@Override
public Iterable call(Iterator lines) throws Exception {
// TODO Auto-generated method stub
ArrayList people = new ArrayList();
ObjectMapper mapper = new ObjectMapper();
while(lines.hasNext()){
String line = lines.next();
try{
people.add(mapper.readValue(line, Person.class));
}catch(Exception e){
e.printStackTrace();
}
}
return people;
}
}
public static class LikesPandas implements Function{
@Override
public Boolean call(Person person) throws Exception {
// TODO Auto-generated method stub
return person.lovesPandas;
}
}
public static class WriteJson implements FlatMapFunction,
String>{
@Override
public Iterable call(Iterator people) throws Exception {
// TODO Auto-generated method stub
ArrayList text = new ArrayList();
ObjectMapper mapper = new ObjectMapper();
while(people.hasNext()){
Person person = people.next();
text.add(mapper.writeValueAsString(person));
}
return text;
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
SparkConf conf = new
SparkConf().setMaster("local[3]").setAppName("spark test2");
JavaSparkContext context = new JavaSparkContext(conf);
JavaRDD input = context.textFile("D:\\test.json");
JavaRDD result = input.mapPartitions(new
ParseJson()).filter(new LikesPandas());
for(Person person: result.collect()){
System.out.println(person.name);
}
JavaRDD formatted = result.mapPartitions(new
WriteJson());
formatted.saveAsTextFile("D:\\test_filter");
}
}