# coding=gbk
import logging
import os
from datetime import datetime
from logging import handlers
from gridfs import GridFS
from pymongo import MongoClient
# syn_file_path = input("Please enter the location of the pdf file to sync, example('C:/Users/admin/test-pdf'): ")
# syn_mongo_url = input("Please enter Mongo url to sync, example('192.168.0.234:27017'): ")
# syn_mongo_username = input("Please enter Mongo username to sync, example('admin'): ")
# syn_mongo_password = input("Please enter Mongo password to sync, example('123456'): ")
# syn_mongo_DB = input("Please enter the DB name to synchronize to Mongo, example('FMSQ'): ")
# syn_mongo_COLLECTION = input("Please enter the DOCUMENT name to synchronize to Mongo, example('patents'): ")
syn_file_path = ''
syn_mongo_DB = ''
syn_mongo_COLLECTION = ''
syn_mongo_url = ''
syn_mongo_username = ''
syn_mongo_password = ''
if syn_file_path == '':
syn_file_path = r'F:\专利索引相关文档\2022_04_08_测试同步数据\摘要附图外观小图各5条专利\WG'
if syn_mongo_DB == '':
syn_mongo_DB = 'document-wg'
if syn_mongo_COLLECTION == '':
syn_mongo_COLLECTION = 'fs'
if syn_mongo_url == '':
syn_mongo_url = '221.194.47.208:26006'
if syn_mongo_username == '':
syn_mongo_username = ''
if syn_mongo_password == '':
syn_mongo_password = ''
log_file = f'./log/syn_pic_to_mongo/{syn_mongo_DB}-{syn_mongo_COLLECTION}'
check_file = os.path.exists(log_file)
if check_file:
print("log file is exit, begin syn mongo to es============================>")
else:
os.makedirs(log_file)
class Logger(object):
level_relations = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
'crit': logging.CRITICAL
}
def __init__(self, filename, level='info', when='D', backCount=3,
fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
self.logger = logging.getLogger(filename)
format_str = logging.Formatter(fmt)
self.logger.setLevel(self.level_relations.get(level))
sh = logging.StreamHandler()
sh.setFormatter(format_str)
th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount, encoding='utf-8')
th.setFormatter(format_str)
self.logger.addHandler(sh)
self.logger.addHandler(th)
log_all_path = f'{log_file}/syn_pdf_to_mongo_all.log'
log_error_path = f'{log_file}/syn_pdf_to_mongo_error.log'
log = Logger(log_all_path, level='debug')
logError = Logger(log_error_path, level='error')
if syn_mongo_username == '' or syn_mongo_password == '':
client = MongoClient(syn_mongo_url)
else:
client = MongoClient(syn_mongo_url, username=syn_mongo_username, password=syn_mongo_password)
db = client[syn_mongo_DB]
fs = GridFS(db, collection=syn_mongo_COLLECTION)
count = 0
def upload():
log.logger.info(f"syn_file_path: {syn_file_path}")
log.logger.info(f"syn_mongo_url: {syn_mongo_url}")
log.logger.info(f"syn_mono_username: {syn_mongo_username}")
log.logger.info(f"syn_mono_password: {syn_mongo_password}")
log.logger.info(f"syn_mongo_DB: {syn_mongo_DB}")
log.logger.info(f"syn_mongo_COLLECTION: {syn_mongo_COLLECTION}")
for file_name in os.listdir(syn_file_path):
file_path = os.path.join(syn_file_path, file_name)
global count
count += 1
query = {"filename": file_path}
if fs.exists(query):
log.logger.info(f"{file_path} file_name exists {count} ============================>")
else:
dic = dict()
dic[file_path] = file_path
dic['upload_time'] = datetime.now()
content = open( file_path, 'rb').read()
fs.put(content, **dic, filename=file_name)
log.logger.info(f"{file_name} file_name write success {count} ============================>")
log.logger.info("All file write success down")
if __name__ == '__main__':
upload()
08-20
4万+
08-14
1万+
04-06
919