#
from pyspark import SparkConf
class EsSparkClient:
def __init__(self,host,port,username=None,password=None):
self.host = host
self.port = port
self.usename = usename
self.password = password
def Pyspark_Read(self,index,query=None):
es_read_conf = {
"es.nodes": self.host,
"es.port": self.port,
"es.net.http.auth.user": self.username,
"es.net.http.auth.pass": self.password,
"es.resource": index,
"es.query": query
}
es_rdd = SparkConf.newAPIHadoopRDD(
inputFormatClass="org.elasticsearch.hadoop.mr.EsInputFormat",
keyClass="org.apache.hadoop.io.NullWritable",
valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
conf=es_read_conf)
return es_rdd
def Pyspark_Wrinfo(self,rdd,index,id=None):
# 保存到ES
es_write_conf = {
"es.nodes": self.host,
"es.port": self.port,
"es.net.http.auth.user": self.username,
"es.net.http.auth.pass": self.password,
"es.resource": index,
"es.mapping.id": id
}
rdd.saveAsNewAPIHadoopFile(
path='-',
outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat",
keyClass="org.apache.hadoop.io.NullWritable",
valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
conf=es_write_conf)
python连接es
#
from elasticsearch import Elasticsearch
class EsClient:
def __init__(self,host,port,username=None,password=None):
self.host = host
self.port = port
self.usename = usename
self.password = password
def Es_Test(self):
esClient = Elasticsearch([{"host": self.ip, "port": self.port}], http_auth=(self.username, self.password))
return esClient