基础环境搭建参考windows环境 logstash-output-mongodb实现Mysql到Mongodb数据同步
一、确认版本信息:
logstash-7.9.0,logstash-output-mongodb 3.1.6,mongo-2.10.0(2.13.0mongo连接异常,其他版本未测试)
二、修改lib\logstash\outputs\mongodb.rb文件
# encoding: utf-8
require "logstash/outputs/base"
require "logstash/namespace"
require "mongo"
require_relative "bson/big_decimal"
require_relative "bson/logstash_timestamp"
# This output writes events to MongoDB.
class LogStash::Outputs::Mongodb < LogStash::Outputs::Base
config_name "mongodb"
# A MongoDB URI to connect to.
# See http://docs.mongodb.org/manual/reference/connection-string/.
config :uri, :validate => :string, :required => true
# The database to use.
config :database, :validate => :string, :required => true
# The collection to use. This value can use `%{foo}` values to dynamically
# select a collection based on data in the event.
config :collection, :validate => :string, :required => true
# If true, store the @timestamp field in MongoDB as an ISODate type instead
# of an ISO8601 string. For more information about this, see
# http://www.mongodb.org/display/DOCS/Dates.
config :isodate, :validate => :boolean, :default => false
# The number of seconds to wait after failure before retrying.
config :retry_delay, :validate => :number, :default => 3, :required => false
# If true, an "_id" field will be added to the document before insertion.
# The "_id" field will use the timestamp of the event and overwrite an existing
# "_id" field in the event.
config :generateId, :validate => :boolean, :default => false
# Bulk insert flag, set to true to allow bulk insertion, else it will insert events one by one.
config :bulk, :validate => :boolean, :default => false
# Bulk interval, Used to insert events periodically if the "bulk" flag is activated.
config :bulk_interval, :validate => :number, :default => 2
# Bulk events number, if the number of events to insert into a collection raise that limit, it will be bulk inserted
# whatever the bulk interval value (mongodb hard limit is 1000).
config :bulk_size, :validate => :number, :default => 900, :maximum => 999, :min => 2
# Upsert documents flag, set to true to use replace_one instead of insert_one. This setting is ignored when bulk
# insert is used
config :upsert, :validate => :boolean, :default => false
# Mutex used to synchronize access to 'documents'
@@mutex = Mutex.new
def register
if @bulk_size > 1000
raise LogStash::ConfigurationError, "Bulk size must be lower than '1000', currently '#{@bulk_size}'"
end
Mongo::Logger.logger = @logger
conn = Mongo::Client.new(@uri)
@db = conn.use(@database)
@closed = Concurrent::AtomicBoolean.new(false)
@documents = {}
@bulk_thread = Thread.new(@bulk_interval) do |bulk_interval|
while @closed.false? do
sleep(bulk_interval)
@@mutex.synchronize do
@documents.each do |collection, values|
if values.length > 0
@db[collection].insert_many(values)
@documents.delete(collection)
end
end
end
end
end
end
def receive(event)
begin
# Our timestamp object now has a to_bson method, using it here
# {}.merge(other) so we don't taint the event hash innards
document = {}.merge(event.to_hash)
if !@isodate
timestamp = event.timestamp
if timestamp
# not using timestamp.to_bson
document["@timestamp"] = timestamp.to_json
else
@logger.warn("Cannot set MongoDB document `@timestamp` field because it does not exist in the event", :event => event)
end
end
if @generateId
document["_id"] = BSON::ObjectId.new
end
if @bulk
collection = event.sprintf(@collection)
@@mutex.synchronize do
if(!@documents[collection])
@documents[collection] = []
end
@documents[collection].push(document)
if(@documents[collection].length >= @bulk_size)
@db[collection].insert_many(@documents[collection])
@documents.delete(collection)
end
end
else
update_result = @db[event.sprintf(@collection)].replace_one({'_id': document['_id']},document,{:upsert => true})
#@logger.warn(update_result, :event => event)
end
rescue => e
if e.message =~ /^E11000/
# On a duplicate key error, skip the insert.
# We could check if the duplicate key err is the _id key
# and generate a new primary key.
# If the duplicate key error is on another field, we have no way
# to fix the issue.
@logger.warn("Skipping insert because of a duplicate key error", :event => event, :exception => e)
else
@logger.warn("Failed to send event to MongoDB, retrying in #{@retry_delay.to_s} seconds", :event => event, :exception => e)
sleep(@retry_delay)
retry
end
end
end
def close
@closed.make_true
@bulk_thread.wakeup
@bulk_thread.join
end
end
测试通过,可以根据主键_id进行修改和新增了,但是脚本比较简陋,详细还请参考GitBub
三、动态修改mongodb集合名称
#输出mongodb的配置
output {
stdout { codec => json }
mongodb {
codec => json
#对应mongodb的输出集合,对应的sql语句中含channelcode字段即可
collection => "tgmatch%{channelcode}"
#对应mongodb的输出数据库名称
database => "resource"
uri => "mongodb://hotelaccount:88691111@192.168.2.61/resource"
}
}
select
*,id _id
from hotel_data.tmc_hotel_channel_hotel_list
where channelCode='homeinns' and updatetime > :sql_last_value
输入结果:
id channelCode cityid hotelTgId qdHotelId qdHotelName qdHotelStatus qdCooperationMode qdContract qdContractBeginTime qdContractEndTime qdCreateDateTime companyId updatetime createtime isdelete issaling
88e20c43-d071-11e9-88fe-fa163e26f6c6 homeinns 1 11 JG0001 北京建国饭店 1 1 2019-09-06 14:44:33 2019-09-06 14:44:33 0 88e20c43-d071-11e9-88fe-fa163e26f6c6
83bfdc4f-d05a-11e9-88fe-fa163e26f6c6 homeinns 1 36 J10010 如家精选酒店-北京广渠门内地铁站鸿润店 1 1 2019-09-06 11:59:46 2019-09-06 11:59:46 0 83bfdc4f-d05a-11e9-88fe-fa163e26f6c6
4d0a987c-d087-11e9-88fe-fa163e26f6c6 homeinns 1 84 010088 如家酒店-北京朝阳北路传媒大学褡裢坡地铁站店(内宾) 1 0 2019-09-06 17:20:21 2019-09-06 17:20:21 0 4d0a987c-d087-11e9-88fe-fa163e26f6c6
查看同步结果: