SQLALchemy双向关系(二)

在深入SQLAlchemy的世界中,我们经常会遇到需要将复杂的JSON数据结构转化为关系型数据库中的实体关系。本文将带您了解如何使用SQLAlchemy处理单层和多层JSON数据,并将它们有效地写入MySQL数据库。通过两个具体的示例,我们将展示如何构建模型、定义关系以及编写相应的数据插入代码。

通过比较两种数据结构的代码实现,我们将了解它们在数据复杂度、代码复杂度、冗余度、添加顺序以及额外数据结构方面的差异。这不仅有助于我们理解SQLAlchemy的灵活性,也为我们在实际开发中选择合适的数据结构提供了指导。

现在,让我们开始这段旅程,通过代码示例深入了解SQLAlchemy双向关系的实现,以及如何高效地将JSON数据写入关系型数据库。

ER图

请添加图片描述

单层数据

from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Text
from sqlalchemy.orm import relationship, sessionmaker, declarative_base

Base = declarative_base()


class Author(Base):
    __tablename__ = 'authors'
    id = Column(Integer, primary_key=True)
    name = Column(String(255), nullable=False)

    books = relationship("Book", back_populates="author")
    reviews = relationship("Review", back_populates="author")


class Book(Base):
    __tablename__ = 'books'
    id = Column(Integer, primary_key=True)
    title = Column(String(255), nullable=False)
    author_id = Column(Integer, ForeignKey('authors.id'))
    publisher_id = Column(Integer, ForeignKey('publishers.id'))

    author = relationship("Author", back_populates="books")
    publisher = relationship("Publisher", back_populates="books")
    reviews = relationship("Review", back_populates="book")


class Publisher(Base):
    __tablename__ = 'publishers'
    id = Column(Integer, primary_key=True)
    name = Column(String(255), nullable=False)

    books = relationship("Book", back_populates="publisher")


class Review(Base):
    __tablename__ = 'reviews'
    id = Column(Integer, primary_key=True)
    book_id = Column(Integer, ForeignKey('books.id'))
    author_id = Column(Integer, ForeignKey('authors.id'))
    review_text = Column(Text)

    book = relationship("Book", back_populates="reviews")
    author = relationship("Author", back_populates="reviews")


engine = create_engine('mysql+pymysql://root:123456@localhost/test', echo=True)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

json_data = [
    {
        "name": "J.K. Rowling",
        "book": "Harry Potter and the Philosopher's Stone",
        "publisher": "Bloomsbury",
        "review_text": "An amazing book!"
    },
    {
        "name": "J.K. Rowling",
        "book": "Harry Potter and the Chamber of Secrets",
        "publisher": "Bloomsbury",
        "review_text": "A thrilling sequel!"
    },
    {
        "name": "George Orwell",
        "book": "1984",
        "publisher": "Secker and Warburg",
        "review_text": "A profound and chilling view of totalitarianism."
    },
    {
        "name": "George Orwell",
        "book": "Animal Farm",
        "publisher": "Secker and Warburg",
        "review_text": ""
    }
]

Session = sessionmaker(bind=engine)
with Session() as session:
    # 存储已经创建的作者和出版社,以避免重复创建
    authors = {}
    publishers = {}

    for item in json_data:
        author_name = item["name"]
        if author_name not in authors:
            author = Author(name=author_name)
            session.add(author)
            authors[author_name] = author

        publisher_name = item["publisher"]
        if publisher_name not in publishers:
            publisher = Publisher(name=publisher_name)
            session.add(publisher)
            publishers[publisher_name] = publisher

        book = Book(title=item["book"], author=authors[author_name], publisher=publishers[publisher_name])
        session.add(book)

        if item["review_text"]:
            review = Review(review_text=item["review_text"], book=book, author=authors[author_name])
            session.add(review)

    session.commit()

多层数据

from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Text
from sqlalchemy.orm import relationship, sessionmaker, declarative_base

Base = declarative_base()


class Author(Base):
    __tablename__ = 'authors'
    id = Column(Integer, primary_key=True)
    name = Column(String(255), nullable=False)

    books = relationship("Book", back_populates="author")
    reviews = relationship("Review", back_populates="author")


class Book(Base):
    __tablename__ = 'books'
    id = Column(Integer, primary_key=True)
    title = Column(String(255), nullable=False)
    author_id = Column(Integer, ForeignKey('authors.id'))
    publisher_id = Column(Integer, ForeignKey('publishers.id'))

    author = relationship("Author", back_populates="books")
    publisher = relationship("Publisher", back_populates="books")
    reviews = relationship("Review", back_populates="book")


class Publisher(Base):
    __tablename__ = 'publishers'
    id = Column(Integer, primary_key=True)
    name = Column(String(255), nullable=False)

    books = relationship("Book", back_populates="publisher")


class Review(Base):
    __tablename__ = 'reviews'
    id = Column(Integer, primary_key=True)
    book_id = Column(Integer, ForeignKey('books.id'))
    author_id = Column(Integer, ForeignKey('authors.id'))
    review_text = Column(Text)

    book = relationship("Book", back_populates="reviews")
    author = relationship("Author", back_populates="reviews")


engine = create_engine('mysql+pymysql://root:123456@localhost/test', echo=True)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

json_data = [
    {
        "name": "J.K. Rowling",
        "books": [
            {
                "title": "Harry Potter and the Sorcerer's Stone",
                "publisher": "Bloomsbury",
                "reviews": [
                    {
                        "review_text": "An amazing book!"
                    }
                ]
            },
            {
                "title": "Harry Potter and the Chamber of Secrets",
                "publisher": "Bloomsbury",
                "reviews": [
                    {
                        "review_text": "A thrilling sequel!"
                    }
                ]
            }
        ]
    },
    {
        "name": "George Orwell",
        "books": [
            {
                "title": "1984",
                "publisher": "Secker and Warburg",
                "reviews": [
                    {
                        "review_text": "A profound and chilling view of totalitarianism."
                    }
                ]
            },
            {
                "title": "Animal Farm",
                "publisher": "Secker and Warburg",
                "reviews": [
                    {
                        "review_text": "A powerful critique of political systems."
                    }
                ]
            }
        ]
    }
]

Session = sessionmaker(bind=engine)
with Session() as session:
    # 存储已经创建出版社,以避免重复创建
    publishers = {}

    for author_data in json_data:
        author = Author(name=author_data["name"])
        session.add(author)

        for book_data in author_data["books"]:
            publisher_name = book_data["publisher"]
            if publisher_name not in publishers:
                publisher = Publisher(name=publisher_name)
                session.add(publisher)
                publishers[publisher_name] = publisher

            book = Book(title=book_data["title"], author=author, publisher=publishers[publisher_name])
            session.add(book)

            for review_data in book_data["reviews"]:
                review = Review(review_text=review_data["review_text"], book=book, author=author)
                session.add(review)

    session.commit()

总结

单层数据结构多层数据结构
数据复杂度简单较复杂
代码复杂度简单较复杂
数据冗余度
添加顺序线性顺序,先一后多按照数据嵌套的顺序
额外数据结构需要的更多的额外数据结构通过嵌套循环减少了额外的数据结构
数据其他组织方式已展平到一层最简形式可以有其他组织形式,比如外层是publisher

最后,这篇文章简化了数据插入,如果是多个人同时进行多条数据的插入,那么可能导致重复插入,而如果增加数据库唯一索引,还是会导致整体事务失败,于是可以从以下两点进行改进:

  1. 插入之前先查询数据库,如果表中数据量较少,甚至可以查询全部进行缓存
  2. 采用嵌套事务,就算某一条数据插入失败,其他数据还是可以继续插入
  3. 可以尝试flush+bulk_insert_mappings的形式,进行批量插入,提高性能
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值