1. 使用 pip install sshtunnel 命令安装 sshtunnel 库
2. 话不多说,上代码
from sqlalchemy import create_engine
from elasticsearch import Elasticsearch
from pandasticsearch import DataFrame
from sshtunnel import SSHTunnelForwarder
import pymysql
import pandas as pd
class MyObject:
def __init__(self):
self.server = SSHTunnelForwarder(
ssh_address_or_host=('xxx.xxx.xxx.xxx', 22), # 指定ssh登录的跳转机的ip+port
ssh_username='username', # 跳转机的用户
ssh_password='password', # 跳转机的密码
# remote_bind_address=('AAA.AAA.AAA.AAA', 3306), # 单 -远程服务器地址+端口
remote_bind_addresses=[('AAA.AAA.AAA.AAA', 3306), ('BBB.BBB.BBB.BBB', 9200)], # 多 -远程服务器地址+端口
)
self.server.start()
self.engine = create_engine('mysql+pymysql://username:password@127.0.0.1:%s/dbname' % self.server.local_bind_ports[0])
# 使用elasticsearch
self.es = Elasticsearch(
['127.0.0.1:80'], # 连接集群,以列表的形式存放各节点的IP地址
# sniff_on_start=True, # 连接前测试
sniff_on_connection_fail=True, # 节点无响应时刷新节点
sniff_timeout=100, # 设置超时时间
http_auth=('es', 'password'), # ES用户名密码
port=str(self.server.local_bind_ports[1]) # 端口号
)
self.conn = pymysql.connect(host="127.0.0.1",
user="username",
password="password",
db="dbname",
port=self.server.local_bind_ports[0],
charset='utf8',
)
self.cursor = self.conn.cursor()
# 使用pandasticsearh
def getDataFromES(self, index_name):
dataFrameFromES = DataFrame.from_es(url='http://127.0.0.1:%s' % self.server.local_bind_ports[1],
doc_type=index_name,
compat=5,
username="es",
password="password")
return dataFrameFromES.limit(20000).to_pandas()
if __name__ == '__main__':
instance = MyObject()
for item in instance.es.cat.indices():
print(item['index'])
instance.getDataFromES("index_name")
# 使用完将服务关闭
instance.server.close()
3. 使用 pandasticsearh 操作 es 时,用户名/密码在 python3 环境会报错,需要修改源代码
找到 python 安装目录 /site-packages/pandasticsearch/client.py
将 %s" % base64creds 替换为自己用在线工具 base64.encode(username:password)后的字符串