es和oracle,Oracle和Elasticsearch数据同步

最新推荐文章于 2023-10-25 09:00:00 发布

蓝大仙人

最新推荐文章于 2023-10-25 09:00:00 发布

阅读量1.7k

点赞数

文章标签： es和oracle

本文档展示了如何使用Python连接Oracle数据库和Elasticsearch，实现数据同步。通过创建ES映射、数据库连接、数据读取和写入ES，实现多个表的数据迁移，如T_SOCIAL、T_HOTSEARCH等，同时记录和更新已同步的最大ID。

摘要由CSDN通过智能技术生成

# -*- coding: utf-8 -*-

"""

作者：陈龙

日期：2016-7-22

功能：oracle数据库到ES的数据同步

"""

import os

import sys

import datetime, time

# import fcntl

import threading

import pyes # 引入pyes模块，ES接口

import cx_Oracle # 引入cx_Oracle模块，Oracle接口

os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' # 中文编码

reload(sys) # 默认编码设置为utf-8

sys.setdefaultencoding('utf-8')

# 创建ES连接并返回连接参数

def connect_ES(addr):

try:

global conn

conn = pyes.ES(addr) # 链接ES '127.0.0.1:9200'

print 'ES连接成功'

return conn

except:

print 'ES连接错误'

pass

# 创建ES映射mapping 注意各各个字段的类型

def create_ESmapping():

global spiderInfo_mapping, involveVideo_mapping, involveCeefax_mapping,keyWord_mapping,sensitiveWord_mapping

spiderInfo_mapping = {'tableName': {'index': 'not_analyzed', 'type': 'string'},

'tableId': {'index': 'not_analyzed', 'type': 'integer'},

'title': {'index': 'analyzed', 'type': 'string'},

'author': {'index': 'not_analyzed', 'type': 'string'},

'content': {'index': 'analyzed', 'type': 'string'},

'publishTime': {'index': 'not_analyzed', 'type': 'string'},

'browseNum': {'index': 'not_analyzed', 'type': 'integer'},

'commentNum': {'index': 'not_analyzed', 'type': 'integer'},

'dataType': {'index': 'not_analyzed', 'type': 'integer'}} # 除去涉我部分内容的ES映射结构

involveVideo_mapping = {'tableName': {'index': 'not_analyzed', 'type': 'string'},

'tableId': {'index': 'not_analyzed', 'type': 'integer'},

'title': {'index': 'analyzed', 'type': 'string'},

'author': {'index': 'not_analyzed', 'type': 'string'},

'summary': {'index': 'analyzed', 'type': 'string'},

'publishTime': {'index': 'not_analyzed', 'type': 'string'},

'url': {'index': 'not_analyzed', 'type': 'string'},

'imgUrl': {'index': 'not_analyzed', 'type': 'string'},

'ranking': {'index': 'not_analyzed', 'type': 'integer'},

'playNum': {'index': 'not_analyzed', 'type': 'integer'},

'dataType': {'index': 'not_analyzed', 'type': 'integer'}} # 涉我视音频内容的ES映射结构

involveCeefax_mapping = {'tableName': {'index': 'not_analyzed', 'type': 'string'},

'tableId': {'index': 'not_analyzed', 'type': 'integer'},

'title': {'index': 'analyzed', 'type': 'string'},

'author': {'index': 'not_analyzed', 'type': 'string'},

'content': {'index': 'analyzed', 'type': 'string'},

'publishTime': {'index': 'not_analyzed', 'type': 'string'},

'keyWords': {'index': 'not_analyzed', 'type': 'string'},

'popularity': {'index': 'not_analyzed', 'type': 'integer'},

'url': {'index': 'not_analyzed', 'type': 'string'},

'dataType': {'index': 'not_analyzed', 'type': 'integer'}} # 涉我图文资讯内容的ES映射结构

keyWord_mapping = {'id':{'index': 'not_analyzed', 'type': 'integer'},

'keywords':{'index': 'not_analyzed', 'type': 'string'}}

sensitiveWord_mapping = {'id':{'index': 'not_analyzed', 'type': 'integer'},

'sensitiveType':{'index': 'not_analyzed', 'type': 'string'},

'sensitiveTopic': {'index': 'not_analyzed', 'type': 'string'},

'sensitiveWords': {'index': 'not_analyzed', 'type': 'string'}}

# 创建ES相关索引和索引下的type

def create_ESindex(ES_index, index_type1,index_type2,index_type3,index_type4,index_type5):

if conn.indices.exists_index(ES_index):

pass

else:

conn.indices.create_index(ES_index) # 如果所有Str不存在，则创建Str索引

create_ESmapping()

conn.indices.put_mapping(index_type1, {'properties': spiderInfo_mapping},[ES_index]) # 在索引pom下创建spiderInfo的_type "spiderInfo"

conn.indices.put_mapping(index_type2, {'properties': involveVideo_mapping},[ES_index]) # 在索引pom下创建involveVideo的_type "involveVideo"

conn.indices.put_mapping(index_type3, {'properties': involveCeefax_mapping},[ES_index]) # 在索引pom下创建involveCeefax的_type "involveCeefax"

conn.indices.put_mapping(index_type4, {'properties': keyWord_map

最低0.47元/天解锁文章

蓝大仙人

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫