sqlalchemy
1、与数据库的表关联
1-1、自动生成models - sqlacodegen
这个工具可以把数据库的表转成sqlalchemy用的class。
但是 table必须要有主键(primary key)。否则转化成的是Table类型而不是class
pip install sqlacodegen
sqlacodegen --tables 表名1,表名2 mysql+pymysql://user:pwd@host:port/数据库名?charset=utf8 > tes_model.py(生成后的文件名)
models/enter.py
①sqlacodegen --tables comp_apt mysql+pymysql://user:pwd@host:port/数据库名?charset=utf8 > models/enter.py
②sqlacodegen --outfile=models.py mysql+pymysql://root:password@127.0.0.1:3306/test --tables teacher,student
1-2、直接autoload = True加載
在构建 model 的时候,使用 autoload = True,sqlalchemy 依据数据库表的字段结构,自动加载 model 的 Column。使用这种方法时,在构建 model 之前,Base 类要与 engine 进行绑定。
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql.schema import Table
engine = create_engine("sqlite:///testdb.db")
Base = declarative_base()
metadata = Base.metadata
metadata.bind = engine
class Employee(Base):
__table__ = Table("employees", metadata, autoload=True)
class ZjyCompApt(Base):
table_name = 'comp_apt'
id = Column(Integer, primary_key=True,autoincrement=True)
comp_id = Column(String(64))
rank_num = Column(Integer)
apt_name = Column(String(32))
cert_no = Column(String(32))
issue_date = Column(String(64))
valid_bdate = Column(String(32))
valid_edate = Column(String(32))
issue_unit = Column(String(32))
# 这个是初始化的是手写的,方便为了操作数据库新增数据
def __init__(self, comp_id, rank_num, apt_name, cert_no, issue_date, valid_bdate, valid_edate, issue_unit=None):
self.comp_id = comp_id
self.rank_num = rank_num
self.apt_name = apt_name
self.cert_no = cert_no
self.issue_date = issue_date
self.valid_bdate = valid_bdate
self.valid_edate = valid_edate
self.issue_unit = issue_unit
2、创建
import datetime
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, inspect, Boolean
from sqlalchemy.dialects.mysql import LONGTEXT
from sqlalchemy.orm import declarative_base, sessionmaker
# DB_URI = 'mysql+pymysql://root:12345678@127.0.0.1:3306/resource?charset=utf8' # 测试
DB_URI = 'mysql+pymysql://crawler_coll:CraWler#181D@106.14.195.181:3306/crawer_test?charset=utf8' # 测试
engine_mysql = create_engine(DB_URI)
Session = sessionmaker(bind=engine_mysql)
Base = declarative_base()
insp = inspect(engine_mysql)
def time_shift(time_str: str):
"""时间字符串的统一"""
try:
datetime_obj = datetime.datetime.strptime(time_str, "%Y-%m-%d")
except ValueError:
return # 报错了 就只能返回None
new_str = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
return new_str
class ResourceBaseTable(Base, ):
# 方法就是把__abstract__这个属性设置为True,这个类为基类,不会被创建为表!
__abstract__ = True # 基类
id = Column(Integer, primary_key=True, comment="主键id", autoincrement=True)
creat_time = Column(DateTime, comment="采集时间", default=datetime.datetime.now, index=True)
ann_tittle = Column(String(1000), comment="公告标题", )
ann_url = Column(String(768), comment="公告官方url", unique=True)
html_content = Column(LONGTEXT, comment="公告富文本")
area_name = Column(String(64), comment="地区名称")
source_plat = Column(String(200), comment="来源平台")
industry = Column(String(100), comment="行业", )
is_clean = Column(Boolean, comment="是否已清洗", default=False, )
@classmethod
def exist_url(cls, ann_url, ):
"""
:param url: url
:return:
"""
with Session() as session:
query = session.query(cls).filter_by(ann_url=ann_url, )
return session.query(query.exists()).scalar()
@classmethod
def save_data(cls, item):
with Session() as session:
for k, v in item.items(): # 把所有为空字符串的转换成None
if not v:
item[k] = None
obj = cls(**item)
session.add(obj)
session.commit()
class ProjectBuildFileClarifyTable(ResourceBaseTable):
__tablename__ = "project_build_file_clarify" # 工程建设 文件澄清
data_type = Column(String(32), comment='数据类型 1-答疑 2资审')
project_type = Column(Integer, comment="项目类型 1房屋及市政 6政府采购 ", default=1)
delivery_time = Column(String(50), comment="提交时间", )
question_content = Column(Text, comment="答疑内容", )
@classmethod
def save_data(cls, item):
delivery_time = item.get("delivery_time")
if delivery_time:
item['delivery_time'] = time_shift(delivery_time)
super(ProjectBuildFileClarifyTable, cls).save_data(item)
class ProjectBuildBidOpeningRecordTable(ResourceBaseTable):
__tablename__ = "project_build_bid_opening_record" # 工程建设 开标记录
label_name = Column(String(300), comment="标段名称", )
ann_release_date = Column(String(50), comment="公告发布日期", )
project_type = Column(Integer, comment="项目类型 1房屋及市政 6政府采购 ", default=1)
margin_amount = Column(String(1000), comment="保证金金额")
@classmethod
def save_data(cls, item):
"""需要每次入库的时候 入一个企业"""
enter_name: str = item.get("enter_name")
if enter_name:
enter_name_list = enter_name
for enter_name in enter_name_list:
new_dict = {
"enter_name": enter_name,
"ann_tittle": item.get("ann_tittle"),
"ann_url": item.get("ann_url"),
"area_name": item.get("area_name"),
"source_plat": item.get("source_plat"),
"industry": item.get("industry"),
}
CompanyTable.save_data(new_dict)
item['enter_name'] = ",".join(enter_name)
super(ProjectBuildBidOpeningRecordTable, cls).save_data(item)
根据sqlalchemy去生成【删除】表
# 只要在创表的那里加
class ResourceBaseTable(Base, ):
__tablename__ = "XXXXX"
...............
if __name__ == '__main__':
# TODO 创建表
Base.metadata.create_all(engine_mysql)
# # TODO 删除表
# Base.metadata.drop_all(engine_mysql)
filter用法
from operator import and_, or_
from random import randint
from tkinter.messagebox import RETRY
from turtle import title
from uuid import uuid4
from sqlalchemy import Column, Integer, String, Float, Text, and_, or_
from util_db import Base, Session
# 创建数据表类型
class Article(Base):
__tablename__ = 't_article'
id = Column(Integer, primary_key = True, autoincrement = True)
#不准为空
titile = Column(String(50), nullable = False)
price = Column(Float, nullable = False)
content = Column(Text)
# 用此方法,输出格式不会再是地址, 是自己拼接的字符串格式输出
def __repr__(self):
return f"<Article(title:{self.titile}) price:{self.price} content:{self.content}>"
# 创建数据内容
def create_data():
with Session() as ses:
for i in range(10):
if i%2 == 0:
art = Article(titile = f'title{i+1}', price = randint(1,100), content = uuid4())
else:
art = Article(titile = f'TITLE{i+1}', price = randint(1,100))
ses.add(art)
ses.commit()
# 测试filter过滤数据:
# ==
def query_data():
with Session() as ses:
rs = ses.query(Article).filter(Article.id == 1).first()
print(rs)
# !=
def query_data_not_equal():
with Session() as ses:
rs = ses.query(Article).filter(Article.id != 1).all()
for i in rs:
print(i)
# like
def query_data_like():
with Session() as ses:
rs = ses.query(Article).filter(Article.titile.like("title%")).all()
for i in rs:
print(i)
# in_
# 注意;in的写法是in_
def query_data_in():
with Session() as ses:
rs = ses.query(Article).filter(Article.titile.in_(['title1','title3','title5'])).all()
for i in rs:
print(i)
# not in_
# 注意;in的写法是 ~ in_
def query_data_in():
with Session() as ses:
rs = ses.query(Article).filter(~Article.titile.in_(['title1','title3','title5'])).all()
for i in rs:
print(i)
# None
def query_data_null():
with Session() as ses:
rs = ses.query(Article).filter(Article.content == None).all()
for i in rs:
print(i)
# not None
def query_data_null():
with Session() as ses:
rs = ses.query(Article).filter(Article.content != None).all()
for i in rs:
print(i)
# and
# 有三种方法
def query_data_null():
with Session() as ses:
#方法1:and
# rs = ses.query(Article).filter(Article.titile!='title4' and Article.price >8 ).all()
# 方法2: ,
# rs = ses.query(Article).filter(Article.titile!='title4' , Article.price >8 ).all()
# 方法3(推荐):and_
rs = ses.query(Article).filter(and_(Article.titile!='title4', Article.price > 8)).all()
for r in rs:
print(r)
# or_
def query_data_or():
with Session() as ses:
rs = ses.query(Article).filter(or_(Article.titile!='title4', Article.price > 8)).all()
for r in rs:
print(r)
if __name__ == "__main__":
# Base.metadata.create_all()
# create_data()
# query_data()
# query_data_not_equal()
# query_data_like()
# query_data_in()
# query_data_null()
query_data_or()