#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys
import pymongo
# 全局参数
config = {}
# 错误输出
errLogFile = None
def tips():
print(
'|------------------------------------------------------------------------------------------------------------------------------------------------------------------------|')
print('| 启动demo ./mongodbCompare.py.py --src=localhost:27017/db? --dest=localhost:27018/db? --logPath=/opt/xx) |')
print(
'|------------------------------------------------------------------------------------------------------------------------------------------------------------------------|')
class MongoCluster:
# pymongo connection
conn = None
# connection string
url = ""
def __init__(self, url):
self.url = url
def connect(self):
self.conn = pymongo.MongoClient(self.url)
def close(self):
self.conn.close()
"""
对比校验mongo信息
"""
def check(src, dest):
srcDatabase = src.conn.get_database()
destDatabase = dest.conn.get_database()
srcCollectionNames = srcDatabase.list_collection_names()
destCollectionNames = destDatabase.list_collection_names()
srcColls = [coll for coll in srcCollectionNames if coll not in config['skip'].split(",")]
dstColls = [coll for coll in destCollectionNames if coll not in config['skip'].split(",")]
print("src Collection", srcColls)
print("dst Collection", dstColls)
# src 存在的必须在 dest
differenceCollectionNames = [item for item in srcColls if not item in dstColls]
if len(differenceCollectionNames) > 0:
errLogFile.writelines("src 与 dst Collection 差集:" + "".join(differenceCollectionNames) + "\n")
for coll in srcColls:
if coll in differenceCollectionNames:
print("src 与 dst Collection 差集:%s 跳过" % (coll))
continue
srcCollection = srcDatabase[coll]
destCollection = destDatabase[coll]
srcCollectionCount = srcCollection.count()
destCollectionCount = destCollection.count()
if srcCollectionCount != destCollectionCount:
errLogFile.writelines(
"Collection: " + coll + " 条数不同,src count:" + str(srcCollectionCount) + "条 dest count:" + str(
destCollectionCount) + "\n")
continue
src_index_length = len(srcCollection.index_information())
dest_index_length = len(destCollection.index_information())
if src_index_length != dest_index_length:
errLogFile.writelines("Collection: " + coll + " 索引信息不同")
continue
if data_comparison(srcCollection, destCollection):
print("比对 Collection : %s 通过 " % (coll))
else:
errLogFile.writelines("Collection: " + coll + " 数据抽样校验不通过")
continue
return True
"""
随机抽样对比数据
"""
def data_comparison(srcColl, dstColl):
count = config['count']
if count > srcColl.count():
count = srcColl.count()
if count == 0:
return True
rec_count = count
batch = 16
show_progress = (batch * 64)
total = 0
while count > 0:
# sample a bounch of docs
docs = srcColl.aggregate([{"$sample": {"size": batch}}])
while docs.alive:
doc = docs.next()
migrated = dstColl.find_one(doc["_id"])
# both origin and migrated bson is Map . so use ==
if doc != migrated:
print("DIFF => src_record[%s], dst_record[%s]" % (doc, migrated))
return False
total += batch
count -= batch
if total % show_progress == 0:
print(" ... process %d docs, %.2f %% !" % (total, total * 100.0 / rec_count))
return True
if __name__ == "__main__":
tips()
config['count'] = 100
# 跳过的
config['skip'] = 'system.profile,system.js'
for i in range(1, len(sys.argv)):
c = sys.argv[i]
c = c[2:]
c = c.split("=", 1)
if c[0] == 'count':
config[c[0]] = int(c[1])
else:
config[c[0]] = c[1]
print('启动参数:', config)
errLogFile = open(config['logPath'], "a")
try:
src, dest = MongoCluster(config['src']), MongoCluster(config['dest'])
src.connect()
dest.connect()
except Exception as e:
print('mongo对比程序出错:', e)
exit()
if check(src, dest):
print('SUCCESS')
exit(0)
else:
print('FAIL')
exit(-1)
src.close()
dest.close()
errLogFile.close()