读写excel数据到数据库
读数据时我学习到了两个库openpyxl和pandas
首先,openpyxl
、、、
from openpyxl import load_workbook
def main():
data_from_excel = get_data_from_excel()
def get_data_from_excel():
f = '../static_datas/hospital/科室名称.xlsx'
wb = load_workbook(f)
sheet = wb[wb.sheetnames[0]]
data = set()
for idx, one_column_data in enumerate(sheet.iter_rows()):
if idx == 0:
continue
data.add(tuple([v.value for v in one_column_data]))
return data
if __name__ == '__main__':
main()
、、、
再接着就是pandas
、、、
from hssz_pgsql import ShuzhiDatabase
import pandas as pd
def main():
db = ShuzhiDatabase('数据库名称', '数据库密码')
# 数据库连接和sql语句的提交都已经写成函数封装起来了,这里直接调用函数
# 获取数据库中已经有的数据
existed_data = get_data_from_sql(db)
# 获取文件中的数据
data_from_excel = get_data_from_excel()
# 添加到数据表中
load_to_database(db, data_from_excel, existed_data)
def get_data_from_excel():
f = '../static_datas/emotion/工作簿19.xlsx'
df = pd.read_excel(f)
data = []
for index, row in df.iterrows():
data.append([v for v in row])
return data
def load_to_database(shuzhi_database, data_from_excel, existed_data):
new_data =[]
for excel_data in data_from_excel:
if excel_data in existed_data:
continue
new_data.append(excel_data)
sql = """insert into emotion (emotion, em_score, em_type, em_label, w_time, is_valid)
values (%s, %s, %s, %s, now(), 1);"""
shuzhi_database.execute_many(sql, new_data)
def get_data_from_sql(shuzhi_database):
sql = """select * from emotion;"""
raw_data = shuzhi_database.get_data_from_pgsql(sql)
data_dict = []
for d in raw_data:
data_dict.append(d[1:5])
return data_dict
if __name__ == '__main__':
main()
、、、
api编写就没什么说的了
爬虫也只是用了xpath语法
Ctrl + Shift + I 检查元素
Ctrl + f 搜索
/子集
//跳级
@属性访问
常用函数 contains()、text()
暂时就这么多