pandas.df生成sql语句字符串
pandas有一个
to_sql
函数,但它只在数据库连接上工作,不能生成字符串。
Pandas.to_sql用法
DataFrame.to_sql(name, con, schema=None,
if_exists='fail', index=True, index_label=None,
chunksize=None, dtype=None, method=None)
df生成SQL字符串
而在实际场景中,不一定能拿到数据库连接,因此目的是将df生成 insert 语句,然后执行sql。
CREATE字符串
import pandas as pd
import numpy as np
# 生成一个df
dates = pd.date_range('20210913', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
# 定义df输入源,以及目标表名
SOURCE = df
TARGET = 'table_name'
# 定义生成CREATE语句方法
def SQL_CREATE_STATEMENT_FROM_DATAFRAME(SOURCE, TARGET):
# SQL_CREATE_STATEMENT_FROM_DATAFRAME(SOURCE, TARGET)
# SOURCE: source dataframe
# TARGET: target table to be created in database
import pandas as pd
sql_text = pd.io.sql.get_schema(SOURCE.reset_index(), TARGET)
return sql_text
print(SQL_CREATE_STATEMENT_FROM_DATAFRAME(df, TARGET))
CREATE TABLE "table_name" (
"index" TIMESTAMP,
"A" REAL,
"B" REAL,
"C" REAL,
"D" REAL
)
INSERT字符串
import pandas as pd
import numpy as np
# 生成一个df
dates = pd.date_range('20210913', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(df)
# 定义df输入源,以及目标表名
SOURCE = df
TARGET = 'table_name'
# 定义生成INSERT语句方法
def SQL_INSERT_STATEMENT_FROM_DATAFRAME(SOURCE, TARGET):
sql_texts = []
for index, row in SOURCE.iterrows():
sql_texts.append(
'INSERT INTO ' + TARGET + ' (' + str(', '.join(SOURCE.columns)) + ') VALUES ' + str(tuple(row.values)))
return sql_texts
print(SQL_INSERT_STATEMENT_FROM_DATAFRAME(df, TARGET))
# 调用sql批量插入数据
# 调用sql字符串批量插入数据
for sql in SQL_CREATE_STATEMENT_FROM_DATAFRAME(df, TARGET):
execute_sql(sql)
['INSERT INTO table_name (A, B, C, D) VALUES (0.1370013090570642, 0.20086002674975936, -0.5266648633200024, -1.0010158457036757)', 'INSERT INTO table_name (A, B, C, D) VALUES (0.9029015275848712, 0.22669041907219445, -0.3828406165823879, -1.1016961725141352)', 'INSERT INTO table_name (A, B, C, D) VALUES (-0.5179178459300068, -0.996284919523673, 0.17503508916400018, 0.6372599752376573)', 'INSERT INTO table_name (A, B, C, D) VALUES (0.39624143317532806, 0.7930236855228965, -0.4033833794961002, -0.9606071107803832)', 'INSERT INTO table_name (A, B, C, D) VALUES (0.5669996514434156, -0.16218439998616851, -1.5355082126138853, 0.7262047833003215)', 'INSERT INTO table_name (A, B, C, D) VALUES (0.05199999801669854, -0.24383492679485919, -2.8410819565423084, 1.0067962689756524)']