没什么好说的,直接粘贴代码吧
public class MySparkReadEs implements Serializable {
private transient JavaSparkContext javaSparkContext = null;
private transient SparkConf sparkConf = null;
private transient Configuration esConf = null;
private String esSource = "post/post";
private String esNodes = "192.168.1.235,192.168.1.236";
private static final Log LOG = LogFactory.getLog(MySparkReadEs.class);
private MySparkReadEs() {
}
private MySparkReadEs(String esSource, String esNodes) {
this.esSource = esSource;
this.esNodes = esNodes;
}
//初始化Spark context
private void initSparkContext() {
this.sparkConf = new SparkConf().setMaster("local[2]").setAppName("my spark rdd");
this.javaSparkContext = new JavaSparkContext(this.sparkConf);
}
//初始化查询Es的配置文件
private void initEsConfig(String urn) {
this.esConf = new Configuration((Configuration) HBaseConfiguration.create());
//指定读取的索引名称
this.esConf.set("es.resource", this.esSource);
//指定es节点
this.esConf.set("es.nodes", this.esNodes);
//加入ES查询条件
this.esConf.set("es.query", buildQueryESCondition(urn));
}
//构建查询Es的条件
private String buildQueryESCondition(String urn) {
//支持lucence的写法 里面可以继续添加其他字段
StringBuilder sb = new StringBuilder();
sb.append("postUrn:").append("(").append(urn).append(")");
//查询条件的构造
ImmutableMap<String, ImmutableMap<String, ImmutableMap<String, String>>> conditionMap = ImmutableMap.of("query", ImmutableMap.of("query_string", ImmutableMap.of("query", sb.toString())));
//SPARK查询ES的查询条件
LOG.info("SPARK 查询 ES 的条件为:" + JSON.toJSONString(conditionMap));
return JSON.toJSONString(conditionMap);
}
//开始任务
private void startJob() {
List<String> extensionCollect = this.javaSparkContext.newAPIHadoopRDD(this.esConf, EsInputFormat.class, NullWritable.class, MapWritable.class).map(new Function<Tuple2<NullWritable, MapWritable>, String>() {
@Override
public String call(Tuple2<NullWritable, MapWritable> v1) throws Exception {
MapWritable mapWritable = v1._2();
Map<String, Object> o = (Map<String, Object>) WritableUtils.fromWritable(mapWritable);
String extension = (String) o.get("extension");
return extension;
}
}).collect();
for (String extension : extensionCollect) {
LOG.info("Extension : " + extension);
}
}
//主方法
public static void main(String[] args) {
String esNodes = "192.168.1.235,192.168.1.236";
String esSource = "post/post";
String userurn = "3136949-bf30c1da6f8b47d3d93a5c4f75194447";
MySparkReadEs mySparkRdd = new MySparkReadEs(esSource, esNodes);
//初始化 spark context
mySparkRdd.initSparkContext();
//初始化 es 配置
mySparkRdd.initEsConfig(userurn);
//开始作业
mySparkRdd.startJob();
}
}