一、用csv快速导入sqlite,省去手工建表的烦。
import pymysql
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
# import pymysql #pymysql包不导入也没事,只要环境中下载就行,下面的引擎连接就会成功
#导包
import sqlalchemy
#连接数据库引擎
engine = create_engine('sqlite:///SLS.db')
#读取表格数据
sale = pd.read_csv('xng.csv')
sale.head()
#将表格数据存入数据库
sale.to_sql('sale', engine)
#(‘表名’,引擎)
data = pd.read_sql_query('select * from sale', engine)
#传递数据库命令语句
#数据类型
type(data)
print(pd.read_sql('select * from sale', engine))
#pd.read_sql('sql语句',引擎)
#pd.read_sql_query('select * from sale',engine)
二、学会将dataframe导入与导出sqlite.
但是要解决重复值导入的问题。因为我是用剪贴板的dataframe所以记得先复制表再执行。
# 显示等待导入
import time
import os
# 数据入理
import pandas as pd
from sqlalchemy import create_engine
from selenium.webdriver.common.by import By
def main():
# 实例化类
amingTB = AmingTbaoBao(Flag=1, cate=50012082, day=30)
# 调用类方法
amingTB.run()
class AmingTbaoBao(object):
def __init__(self, Flag, cate, day):
# 初始化转置的pd
self.df = None
# 超链接构造参数
self.Flag = Flag
self.cate = cate
self.day = day
# 绑定浏览器
# 正式开始遍历竞品进行取值。
def parse_Aming(self):
# time.sleep(3)
# 将数据写入df表格中,df做临时存放.顺便进行行列转置,用 T
# df = pd.read_clipboard(sep='t', skiprows=[0, 1]).T
self.df = pd.read_clipboard(sep='t', skiprows=[0, 1])
# # 因为剪贴板的不稳定,所以用下面的来实现获取表格
# self.df = pd.read_html(url)
# 格式化打印一下df的内容到终端
self.to_cmd(self.df)
# # 整理df为目标格式
# self.change_df(df)
# 将dataframe导入到sqlite保存
self.pandas_to_sqlite()
# 从阿明导出数据到excel
# self.Aming_to_excel()
# 一定要加上,防止死循环
def to_cmd(self, df):
print('*'*50, end='n')
print(df)
print('*'*50, end='n')
return
def pandas_to_sqlite(self):
#连接数据库引擎
# 当replace时,会删原表,包括所有设置清除。一般只有重设表格式时用一下。
# 一般常用appdend。因为这是在后面追加数据,但是要注意设置唯一性索引。
conn = create_engine('sqlite:///Uringo.db', encoding='utf-8')
pd.io.sql.to_sql(self.df, name='UringJinPing', con=conn,
if_exists='append', index=False)
conn.dispose()
print("成功导入sqlite")
# 从sqlite里取出几条数看看是否插入成功
def sqlite_to_panada(self):
conn = create_engine('sqlite:///Uringo.db', encoding='utf-8')
data = pd.read_sql_query('select * from UringJinPing', conn)
conn.dispose()
print(data.tail(5))
print("成功从sqlite输出")
def run(self):
self.parse_Aming()
self.sqlite_to_panada()
if __name__ == '__main__':
main()
三、学会用sql语句清除数据库里的重复行。比如没有设置唯一键时,怎么用多列值组合。
我找了好久好久才找到的。菜鸡就是辛苦。什么都靠度娘。
delete from UringJinPing where rowid not in
(
select min(rowid)
from UringJinPing
group by "商品id","日期"
)
代码收集二:
import pandas as pd
import sqlalchemy as sqla
import pymysql
import time
def pandas_import():
db=sqla.create_engine('mysql+pymysql://test:test123@127.0.0.1/db_test?charset=utf8')
df = pd.read_excel('./test.xlsx', header=[0]) # 第一行是表头
df['create_time'] = time.time() # 可忽略此行
pd.io.sql.to_sql(name='t_test_file_import', frame=df, con=db, index=False, if_exists='append') # 追加写入
if __name__ == '__main__':
curr_time_start = time.time()
print('开始时间为%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())))
pandas_import()
curr_time_end = time.time()
print('结束时间为%s' % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())))
print('总计耗时为%s' % (curr_time_end - curr_time_start))
代码收集三:sqlalchemy查询反回结果与定时的用法
import schedule
import codecs
import csv
import time
from sqlalchemy import create_engine
def get_conn():
engine = create_engine("mysql+pymysql://root:password@localhost:3306/test?charset=utf8mb4")
conn = engine.connect()
return conn
def query():
sql = "select * from user limit 10"
conn = get_conn()
return conn.execute(sql)
def read_mysql_to_csv(filename):
with codecs.open(filename=filename, mode='w') as f:
write = csv.writer(f, dialect='excel')
results = query()
for result in results:
write.writerow(result)
schedule.every().day.at("17:49").do(read_mysql_to_csv, "test")
while True:
schedule.run_pending()
time.sleep(10)