保存数据
1.先判断文件是否存在
2.1存在则先读再拼接,再存储
2.2不存在直接读取
csv
pd.read_csv('.csv')
data.to_csv(index=False)
json
import json
def save_to_json(json_data_list):
if os.path.exists("selinum_data_test.json"): # 保存文件
with open("selinum_data_test.json", "r", encoding="utf-8") as f_read:
old_json_data_list = json.load(f_read)
total_json_data_list = old_json_data_list + json_data_list
with open("selinum_data_test.json", "w", encoding="utf-8") as f_write:
json.dump(total_json_data_list, f_write, indent=2)
else:
with open("selinum_data_test.json", "w", encoding="utf-8") as f_write:
json.dump(json_data_list, f_write, indent=2)
保存到数据库
pymysql连接
PyMySQL 是在 Python3.x 版本中用于连接 MySQL 服务器的一个库,可以使用pymysql来MySQL数据库的连接,并实现数据库的各种操作
df是dataframe类型数据
import pymysql
config = {
'host': '192.168.***.***',
'user': '**',
'pwd': '***',
'port': 3306,
'db': '**',
'charset': 'utf8'
}
conn = pymysql.connect(host=config["host"], user=config["user"], password=config["pwd"], database=config["db"])
# 保存到数据库 转换为元组
insert_data = []
for i in tqdm(range(df.shape[0])):
insert_data.append(tuple(np.array(df.iloc[i, :]).tolist()))
try:
conn.ping(reconnect=True)
cursor = conn.cursor()
sql_insert = """
insert into table_name(id ) VALUES (%s)
"""
# 读取
# cur.execute(sql)
# rows = cur.fetchall()
for i in range(len(insert_data) // 1000 + 1):
cursor.executemany(sql_insert, insert_data[1000 * i:1000 * (
i + 1)]) # 这里需要进行批量数据插入,每次插1000条!
conn.commit()
except Exception as e:
print(e)
finally:
cursor.close()
conn.close()
sqlalchemy库
参考:SQLAlchemy 1.4 Documentation
连接trino https://pypi.org/project/sqlalchemy-trino/
Engine 是访问数据库的入口,Engine引用Connection Pool和 Dialect实现了对数据库的访问, Dialect指定了具体的数据库类型 MYSQL, SQLSERVER等
只有当调用connect(),execute()函数的时候,才会创建数据库的连接
from sqlalchemy.engine import Engine, Connection
from sqlalchemy.engine import create_engine
engine = create_engine('postgresql://scott:tiger@localhost:5432/mydatabase')# 'mysql+pymysql://root:123456@localhost:3306/dev_shopping?charset=utf8'
#iceberg示例 trino_engine = create_engine('trino://root@192.168.1.1:8881/iceberg/dwt')
# 读取
data= pd.read_sql("***)", connection)
# 写入
data.to_sql(name='tb_name', con=trino_engine , index=False, if_exists='append') #', schema="dwt", method="multi"
with engine.connect() as con:
con.execute('DROP TABLE if exists tb_name')