通过scrapy 我的数据库爬到了2w多的数据,想着尝试从mysql中导出到excel
于是尝试了 xlwt包+pymysql 在3.6的环境下导出,不曾想到,表中字段content内容过长,导致xlwt包没办法实现,通过搜索找到了openpyxl包,可以解决这个问题
但是一般字段长度的表还是能够用xlwt导出的
先给出xlwt的代码
xlwt+pymsql
import pymysql
import xlwt
import datetime
def get_excel(MYSQL_HOST, MYSQL_DANAME, MYSQL_USER, MYSQL_PASSWD, MYSQL_TABLE):
connect = pymysql.connect(
host=MYSQL_HOST,
port=3306,
db=MYSQL_DANAME,
user=MYSQL_USER,
passwd=MYSQL_PASSWD,
charset='utf8',
use_unicode=True
)
cursor = connect.cursor()
sql = 'select * from %s' % (MYSQL_TABLE)
cursor.execute(sql)
connect.commit()
results = cursor.fetchall()
workbook = xlwt.Workbook()
sheet = workbook.add_sheet('table_test', cell_overwrite_ok=True)
fields = cursor.description
for field in range(0, len(fields)):
sheet.write(0, field, fields[field][0])
row = 1
col = 0
for row in range(1, len(results) + 1):
for col in range(0, len(fields)):
try:
sheet.write(row, col, u'%s' % results[row - 1][col])
except:
print('an error')
workbook.save(r'./{0}{1}.xlsx'.format(MYSQL_DANAME, MYSQL_TABLE))
if __name__ == '__main__':
starttime = datetime.datetime.now()
MYSQL_HOST = 'localhost' # 数据库地址
MYSQL_DANAME = 'human' # 数据库名称
MYSQL_USER = 'root' # 登陆用户
MYSQL_PASSWD = 'root1234' # 登陆密码
MYSQL_TABLE = 'imageattchment' # 要导出表的名称
get_excel(MYSQL_HOST, MYSQL_DANAME, MYSQL_USER, MYSQL_PASSWD, MYSQL_TABLE)
endtime = datetime.datetime.now()
timespend = endtime - starttime
print('花费了%s' % timespend)
openpyxl+pymysql##
import pymysql
from openpyxl import Workbook
from openpyxl.writer.excel import ExcelWriter
from openpyxl.cell.cell import get_column_letter
import datetime
def get_excel(MYSQL_HOST, MYSQL_DANAME, MYSQL_USER, MYSQL_PASSWD, MYSQL_TABLE,
# fields,#需要中文 就取消注释
):
connect = pymysql.connect(
host=MYSQL_HOST,
port=3306,
db=MYSQL_DANAME,
user=MYSQL_USER,
passwd=MYSQL_PASSWD,
charset='utf8',
use_unicode=True
)
cursor = connect.cursor()
sql = 'select * from %s' % (MYSQL_TABLE)
cursor.execute(sql)
connect.commit()
results = cursor.fetchall()
fields_complex = cursor.description # 字段名
fields = []#要用到中文字段名 就需要注释
for i in range(len(fields_complex)):
fields.append(fields_complex[i][0])
wb = Workbook()
ws = wb.worksheets[0]
ws.title = MYSQL_TABLE
data = []
data.append(fields)
for i in results:
data.append(i)
i = 1
for line in data:
for col in range(1, len(line) + 1):
ColNum = get_column_letter(col)
ws.cell('%s%s' % (ColNum, i)).value = line[col - 1]
i += 1
wb.save(r'./{0}{1}.xlsx'.format(MYSQL_DANAME, MYSQL_TABLE))
if __name__ == '__main__':
starttime = datetime.datetime.now()
MYSQL_HOST = 'localhost' # 数据库地址
MYSQL_DANAME = 'human' # 数据库名称
MYSQL_USER = 'root' # 登陆用户
MYSQL_PASSWD = 'root1234' # 登陆密码
MYSQL_TABLE = 'imageattchment' # 要导出表的名称
fields = [
'id', '标题', '时间', '文章地址', '文章内容', '文章分类', '文章来源', '图片id', '缩略图id', '图片对应的uuid',
] # 设置中文标题
get_excel(MYSQL_HOST,
MYSQL_DANAME,
MYSQL_USER,
MYSQL_PASSWD,
MYSQL_TABLE,
# fields,#设置中文 就取消注释
)
endtime = datetime.datetime.now()
timespend = endtime - starttime
print('花费了%s' % timespend)
顺便比较了下俩个包的导出速度
xlwt的速度
openpyxl速度
而且还比较了 俩个导出excel的占用大小
有兴趣的小伙伴可以自己研究下