csv导mongo

import logging
import os
import time

import pandas as pd

from ..mongo_ import MongoL

logger = logging.getLogger(__name__)


class CsvMongo(object):

    def __init__(self):
        self.file_list = []
        mongo = MongoL()
        self.mongo_cli, self.collection = mongo.mongo_col()

    def csv_file(self, dir_path):
        for root, dirs, files in os.walk(dir_path):
            for file in files:
                if ".csv" in file:
                    self.file_list.append(file)

        return self.file_list

    def write_db(self, file, app_id, module_id):
        read_ = pd.read_csv(file, chunksize=10000, usecols=['url', 'username'])
        for dg in read_:
            for _, line in dg.iterrows():
                url = line[0]
                username = line[1]
                data = {"app_id": app_id, "module_id": module_id, "result": username,
                        "result_url": url, "app_type_key": "community", "origin": {},
                        "ct": int(time.time())}
                res = self.collection.update_one({"app_id": 4, "module_id": 4, "result": data.get("result")},
                                                 {"$set": data},
                                                 upsert=True)
                logger.info("updata_count:%s,instering:%s" % (res.matched_count, data))

    # def find_

    def csv_mongo(self, files):
        for file in files:
            if file == "dgtle.csv":
                self.write_db(file=file, app_id=4, module_id=4)
            elif file == "oschina.csv":
                self.write_db(file=file, app_id=18, module_id=18)
            elif file == "feng.csv":
                self.write_db(file=file, app_id=17, module_id=17)
            elif file == "oppo.csv":
                self.write_db(file=file, app_id=8, module_id=8)
            elif file == "idol001.csv":
                self.write_db(file=file, app_id=19, module_id=19)
            elif file == "gelonghui.csv":
                self.write_db(file=file, app_id=20, module_id=20)


if __name__ == '__main__':
    cm = CsvMongo()
    files = cm.csv_file(os.path.dirname(__file__))
    cm.csv_mongo(files)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值