pymongo使用经验

1、MongoClient不要放到for循环里面,放到外面作为全局变量,速度快5倍;
2、pymongo.errors.CursorNotFound: Cursor not found 可能就是数据库连接创建的太多导致;
3、no_cursor_timeout=True参数对于单次查询影响不大。

#!/usr/bin/env python
# -*-  coding:utf-8  -*-
# @Date:2021/9/7
# @Time:13:46
import time
from pymongo import MongoClient
from database.simplelogger import SimpleLogger
from readwrite.writefile import FileWriter
logger = SimpleLogger.getLogger(u"msgdeallogger")
"""1000 records consuming 43.94992995262146 s"""

FW = FileWriter()
# MongoClient不要放到for循环里面,作为全局变量,速度快5倍
db_client = MongoClient('mongodb://user:password@ip:port/')


def main(database, collection):
    # connection
    if check_mongodb(db_client, database, collection):

        # database
        db_name = db_client[database]

        # collections
        collection_name = db_name[collection]

        cursor_results = collection_name.find({"is_deleted": {"$ne": True}}, no_cursor_timeout=True)
        results = list(cursor_results)
        insert_list = []
        cnt = 0
        s = time.time()
        for res in results:
            if cnt == 1000:
                e = time.time()
                print('1000 records consuming {} s'.format(e-s))
            cid = res['company_id']
            company_type_list = res['company_types']
            company_name, is_deleted = find_company_name(cid)
            company_type = transfer_key(company_type_list)
            insert_list.append([cid, company_name, is_deleted, company_type])
            cnt += 1

        columns = ["cid", "company_name", "is_deleted", "company_type"]
        FW.excel_write_clean(excel_path='company333.xlsx', sheet_name='Sheet1', insert_list=insert_list, columns=columns)



def find_company_name(cid):
    """
    find()是得到1个游标对象,Returns
    find_one()得到的是一个字典,Returns a single document, or ``None``
    :param cid:
    :return:
    """
    database = 'xxx'
    collection = 'yyy'
    if check_mongodb(db_client, database, collection):
        # database
        db_name = db_client[database]

        # collections
        collection_name = db_name[collection]

        cursor_results = collection_name.find({"cid": cid})
        results = list(cursor_results)
        try:
            company_name = results[0]['name']
            is_deleted = results[0]['is_deleted']
        except:
            # print(cid, results)  # 98932
            company_name = '查无此公司'
            is_deleted = True

        return company_name, is_deleted


def transfer_key(company_type_list):
    type_list = []
    type_dict = {u"80": u"金融"}
    for type_code in company_type_list:
        try:
            type_list.append(type_dict[str(type_code)])
        except:
            type_list.append("不存在标签"+str(type_code))
    final_type = ','.join(type_list)
    return final_type



if __name__ == '__main__':
    main(database='database', collection='company_xxx')

https://blog.csdn.net/qq_42470170/article/details/121017679

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值