MySql --sqlalchemy---mongo示例用法

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, UniqueConstraint, Index, BigInteger, Date, Text, not_
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
import mongo_connect
from mongo_connect import MongoDao
from mongo_connect import url,db,coll_name
import time

# from utils.dataetl_config import notice_host, notice_port, notice_user, notice_pwd, notice_db

host="192.168.41.128"
port=3306
user="root"
passwd="mysql"
db="collect_resultdb"

engine = create_engine("mysql+pymysql://%s:%s@%s:%s/%s?charset=utf8&autocommit=true" % (user,passwd,host,port,db),
                       max_overflow=20,pool_size=10)

# from wechat.models import SystemConst, ResearchReport
Session = sessionmaker(bind=engine)
session = Session()

Base = declarative_base()

class Notice(Base):
    """
    该表收录了国内主流券商微信公众号研报内容
    """
    __tablename__ = 'collect_gtja'
    # ID = models.BigIntegerField(primary_key=True)
    info_id = Column(String(255), primary_key=True)
    datetime = Column(String(64))
    title = Column(String(255))
    file_name = Column(String(255))
    URL =  Column(String(255))
    source = Column(String(255))

    content =  Column(Text) # models.CharField(max_length=50, primary_key=True)
    file_path = Column(Text)
    file_attach = Column(Text)
    summary = Column(Text)

    stock_name = Column(String(255))
    stock_code = Column(String(255))
    news_type = Column(String(255))
    info_type = Column(String(255))
    author = Column(String(255))

    image_attach = Column(Text)
    status = Column(String(10))

    file_id = Column(String(64))

    __table_args__ = (
    UniqueConstraint('info_id'),
       Index('info_id'),
    )

    def query_by_update_time(self,offset,now,status="1"):
        session = Session()
        # dt_s = datetime.now().date()
        reports = session.query(Notice).filter(
            # Notice.datetime.between(offset,now),
            Notice.status == status
        )
        return session,reports

    def create(self):
        Session = sessionmaker(bind=engine)
        session = Session()
        session.add(self)
        session.commit()

    def query_by_id(self,id):
        session = Session()
        reports = session.query(Notice).filter(
            # Notice.datetime.between(offset,now),
            Notice.info_id == id
        )
        return session,reports

    def query_by_status(self):
        session = Session()
        # dt_s = datetime.now().date()
        reports = session.query(Notice).filter(
            # Notice.datetime.between(offset,now),
            Notice.status == 1
        )
        return session,reports

if __name__ == '__main__':

    notice = Notice()
    mongodao = MongoDao(mongo_connect.url,mongo_connect.db,mongo_connect.coll_name)
    mongodao.collection
    se,results = notice.query_by_status()
    # print(results,"$"*20)
    # print(type(results))
    # print(results.values())
    # print(results.all())
    # for l in results.all():
    # # l = results.first()
    #     print(l.author)
    for result in results.all():
        spider_data = result
        data = {}
        data["update_time"] = time.time()
        data["title"] = spider_data.title
        data["content"] = ""
        data["html"] = ""

        data["url"] = spider_data.URL

        data["source_id"] = ""
        data["summary"] = ""

        data["content_time"] = spider_data.datetime

        # data["info_type"] = InfoType.notice.value

        data["source"] = spider_data.source
        data["source_channel"] = ""

        # data["channel"] = ChannelType.spider.value
        data["stock_name"] = spider_data.stock_name
        data["stock_code"] = spider_data.stock_code

        # data["file_type"] = DataType.pdf.value
        # data["keywords"] = spider_data["summary"]
        data["author"] = spider_data.author
        data["create_time"] = ""

        print(data)
        # logger.info(data["title"])

        mongodao.search_upsert_one(filter={"url": data["url"]},
                              data=data,
                              ids={"update_time": 0,
                                   "D_id": 0})

 插入数据

class KsRoomInfo(Base):
    __tablename__ = 'kuaishou_room_info'
    sys_id = Column(Integer, primary_key=True, autoincrement=True)
    roomId = Column(String(64))  # 游戏ID
    cityName = Column(String(64))
    nickName = Column(String(64))
    desc = Column(String(128))  # 比赛ID
    fans = Column(String(64))
    follow = Column(String(64))
    video_count = Column(String(64))
    game_caty = Column(String(64))
    user_caty = Column(String(64))
    sys_time = Column(DateTime, default=datetime.datetime.now)
    sys_timeindex = Column(Integer)



def saveData(datas):
    session=sessionmaker(ENGINE2)()
    session.add(datas)
    session.commit()


ks = KsRoomInfo(
                        # roomId=data["data"]["sensitiveUserInfo"]["kwaiId"],
                        roomId=room["args1"],
                        cityName=data["data"]["sensitiveUserInfo"]["cityName"],
                        nickName=data2["nickName"] if data2["nickName"] else room["nickName"],
                        fans=data["data"]["sensitiveUserInfo"]["counts"]["fan"],
                        follow=data["data"]["sensitiveUserInfo"]["counts"]["follow"],
                        video_count=data["data"]["sensitiveUserInfo"]["counts"]["photo"],
                        desc=data2["desc"] if data2["desc"] else room["nickName"],
                        sys_time=sys_time,
                        sys_timeindex=sys_timeindex,
                        game_caty=room["game_caty"],
                        user_caty=room["user_caty"],
                        )

saveData(ks)
# -*- coding:utf8 -*-
# time: 19-1-18 下午3:56
# Author:zhuhao
# ­*­ coding: utf­8 ­*­
"""
"""
import json
import os
from io import BytesIO
import sys

import bson
from pymongo import MongoClient, ReturnDocument


# from utils.constants import DataStatus
url = "localhost"
db = "collet"
coll_name = "collect_notice"
class MongoDao(object):
    def __init__(self,url,db,coll_name):
        self.collection = self._get_coll(url=url, db=db, coll_name=coll_name)

    def _get_coll(self,url, db, coll_name):
        db = _get_db(url, db)
        return db[coll_name]

    def _get_connection(self,url):
        return MongoClient(url)

    def _get_db(self,url, db):
        client = _get_connection(url)
        return client[db]

    def search_one(self, filter={}):
        return self.collection.find_one(filter, no_cursor_timeout=True)

    def search(self, filter={}):
        return self.collection.find(filter, no_cursor_timeout=True)

    def insert_one(self,data={}):
        self.collection.insert_one(document=data)

    def update_one(self, filter={}, data={}, upsert=False):
        """
        upsert = update + insert
        :param filter:
        :param data:
        :param upsert:
        :param collection:
        :return:
        """
        self.collection.update_one(filter=filter, update={"$set": data}, upsert=upsert)

    def search_upsert_one(self, filter={}, data={}, ids={}):
        """
        :param filter:
        :param data:
        :param ids: {"key1":1,"key2":0} 合并(1)or覆盖(0)
        :return:
        """
        if filter is not {}:
            record = self.collection.find_one(filter=filter)
        else:
            record = None
        if record:
            for id_key,v in ids.items():
                if id_key in record:
                    if v == 1 and isinstance(data[id_key],record[id_key]):
                        data[id_key] += record[id_key]
                    else:
                        data[id_key] = record[id_key]
            self.collection.update_one(filter=filter, update={"$set": data})
        else:
            # 插入
            self.collection.insert_one(document=data)






def insertFile(file_path):
    """
    insert file whose size < 16M into mongodb
    :param file_path:
    :return:
    """
    try:
        fin = open(file_path, "rb")
        img = fin.read()
        data = BytesIO(img)
        fin.close()
        # conn = MongoClient('10.101.12.23', 27017)
        conn = MongoClient('101.132.167.173', 8003)
        db = conn.mydb
        coll = db.test_set
        with open(file_path, 'rb') as myimage:
            content = BytesIO(myimage.read())
            coll.save(dict(
                content=bson.binary.Binary(content.getvalue()),
                filename=file_path.split(".")[0],
                file_format=file_path.split(".")[1]
            ))
    except Exception as e:
        print(e)
        sys.exit(1)
    # finally:
    #     imgput.close()  todo


def file_read_binary(plugin_name, file_path=None):
    """
    从mongo中读文件,文件为二进制
    :return:
    """
    try:
        # fin = open(os.path.join(file_path,plugin_name), "wb")
        # img = fin.read()
        # data = StringIO(img)
        # fin.close()
        conn = MongoClient('101.132.167.173', 8003)
        db = conn.mydb
        files = db.test_set
        res = files.find_one({"filename": plugin_name})
        fin = open(os.path.join(file_path, plugin_name) + "." + res["file_format"], "wb")
        print(res)
        content = res['content']

        fin.write(content)
        # print('sum:', files.count())
        # insertimg = imgput.put(data)
        # print insertimg
    except Exception as e:
        print(e)
        sys.exit(1)


def _get_connection(url):
    return MongoClient(url)


def _get_db(url, db):
    client = _get_connection(url)
    return client[db]


def get_coll(url, db, coll_name):
    db = _get_db(url, db)
    return db[coll_name]


def search(url, db, coll_name, filter={}):
    collection = get_coll(url=url, db=db, coll_name=coll_name)
    return collection.find(filter, no_cursor_timeout=True)


def search_one(url, db, coll_name, filter={}, collection=None):
    if collection is None:
        collection = get_coll(url=url, db=db, coll_name=coll_name)
    return collection.find_one(filter, no_cursor_timeout=True)


def fetch_one(url, db, coll_name, filter={}, ):
    collection = get_coll(url=url, db=db, coll_name=coll_name)
    # return collection.find_one(filter)
    return collection.find_one_and_update(filter,
                                          {"$set": {"status": "1"}},
                                          return_document=ReturnDocument.AFTER)


def get_collection_names(url):
    """
    获取mongodb所有数据库对应的collection名
    :param url:
    :return:
    """
    client = _get_connection(url)
    d = dict((db, [collection for collection in client[db].collection_names()])
             for db in client.database_names())
    return d


def update_one(url, db, coll_name, filter={}, data={}, upsert=False,collection=None):
    if collection is None:
        collection = get_coll(url=url, db=db, coll_name=coll_name)
    else:
        collection.update_one(filter=filter, update={"$set": data}, upsert=upsert)
    # db.stu.update_one({"age": 18}, {"$set": {"age": 100}})


def insert_one(url, db, coll_name, data={}):
    collection = get_coll(url=url, db=db, coll_name=coll_name)
    collection.insert_one(document=data)


def insert_or_update_one(url, db, coll_name, filter={}, data={},id_key=None):
    collection = get_coll(url=url, db=db, coll_name=coll_name)
    if filter is not {}:
        record = collection.find_one(filter=filter)
    else:
        record = None
    if record:
        # 更新
        if "update_time" in record:
            data["update_time"] = record["update_time"]
        if id_key is not None:
            data[id_key] = record[id_key]
        collection.update_one(filter=filter, update={"$set": data})
    else:
        # 插入
        collection.insert_one(document=data)


def save_or_update_one(url, db, coll_name, filter={}, data={}):
    collection = get_coll(url=url, db=db, coll_name=coll_name)
    record = collection.find_one(filter=filter)
    if record:
        # 更新
        data["D_id"] = record["D_id"]
        collection.update_one(filter=filter, update={"$set": data})
    else:
        # 插入
        collection.insert_one(document=data)


if __name__ == "__main__":
    # print (search(url = 'mongodb://101.132.167.173:8013/',db="mydb",coll_name="test_clean")[0]["result"])
    print(get_collection_names(url='mongodb://101.132.167.173:8014/')["pyspider_resultdb1"])
    # result = fetch_one(url='mongodb://101.132.167.173:8014/',
    #              db="mydb",
    #              coll_name="test_clean",
    #              filter={'status': {'$exists': 1}})
    # print(result)
    # if result:
    #     print("00")
    # else:
    #     print("11")

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值