分析
to_sql 最终调用到 SQLAlchemyEngine 的 insert_records
class SQLAlchemyEngine(BaseEngine):
def __init__(self) -> None:
import_optional_dependency(
"sqlalchemy", extra="sqlalchemy is required for SQL support."
)
def insert_records(
self,
table: SQLTable,
con,
frame,
name,
index: bool | str | list[str] | None = True,
schema=None,
chunksize=None,
method=None,
**engine_kwargs,
) -> int | None:
from sqlalchemy import exc
try:
return table.insert(chunksize=chunksize, method=method)
except exc.StatementError as err:
# GH34431
# https://stackoverflow.com/a/67358288/6067848
msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?#
)|inf can not be used with MySQL"""
err_text = str(err.orig)
if re.search(msg, err_text):
raise ValueError("inf cannot be used with MySQL") from err
raise err
然会调用 SQLTable 的insert函数
def insert(
self, chunksize: int | None = None, method: str | None = None
) -> int | None:
# set insert method
if method is None:
exec_insert = self._execute_insert
elif method == "multi":
exec_insert = self._execute_insert_multi
elif callable(method):
exec_insert = partial(method, self) <----做了个包装。把SQLTable 自己传进去
else:
raise ValueError(f"Invalid parameter `method`: {method}")
keys, data_list = self.insert_data()
nrows = len(self.frame)
if nrows == 0:
return 0
if chunksize is None:
chunksize = nrows
elif chunksize == 0:
raise ValueError("chunksize argument should be non-zero")
chunks = (nrows // chunksize) + 1 <----计算每次写入量
total_inserted = None
with self.pd_sql.run_transaction() as conn:
for i in range(chunks):
start_i = i * chunksize
end_i = min((i + 1) * chunksize, nrows)
if start_i >= end_i:
break
chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list))
num_inserted = exec_insert(conn, keys, chunk_iter)<- 后面三个参数
# GH 46891
if is_integer(num_inserted):
if total_inserted is None:
total_inserted = num_inserted
else:
total_inserted += num_inserted
return total_inserted
例子
from sqlalchemy import create_engine,exc
from ....jiarenmen.jiarenmen import settings
import traceback
all_stock_info_df=get_all_stock_info()
try:
sqlite3_path=str(settings.DATABASES["default"]['NAME'])
engine=create_engine('sqlite:///'+sqlite3_path, echo=False)
from sqlalchemy.dialects.sqlite import insert
# import pandas.io.sql.SQLTable
def insert_or_update(table, conn, keys, data_iter):
# """
# Execute SQL statement inserting data
# Parameters
# ----------
# table : pandas.io.sql.SQLTable
# conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
# keys : list of str
# Column names
# data_iter : Iterable that iterates the values to be inserted
# """
# 方法一: 用padnas 的方法插入,写起来简单。效率太低了
# 且pandas 官方写的方法太少了。只能做更新和不更新的操作
#这一步本质其实也是下面
# inserted_number=None
# try:
# # 返回成功插入的行数量
# inserted_number =table._execute_insert(conn,keys,data_iter)
# print(inserted_number)
# except Exception as e:
# print("inserted fail")
# finally:
# return inserted_number
# 方法二:直接使用sqlalchemy 来完成最自由的操作
#这一步基本什么情况下都要写,作用是把所有列形成的zip对象,拆分成一行数据和一个列名组成的字典
data = [dict(zip(keys, row)) for row in data_iter]
#注意这个table.table 里面2个table是不一样的
#第一个table 是SQLTable 对象 第二个table 是TableClause 对象。后者是拿来变成sql语句的。
print(data)
innserted_number=0
for dict_ in data:
stmt = insert(table.table).values(data).on_conflict_do_update(index_elements=["code"],set_=dict_)
result = conn.execute(stmt)
innserted_number+=result.rowcount
print(f"inserted number {innserted_number}")
return innserted_number
all_stock_info_df.to_sql(name="richmen_stockinfo",con=engine,chunksize=1,index=False,if_exists='append',method=insert_or_update)
except exc.IntegrityError as e:
# 这个就是有大问题了。比如update不了
traceback.print_exc()