Python 效率革命~Python Excel~openpyxl。
文章目录
入门。
pip install openpyxl
geek@ubuntu:~/geek$ python3
Python 3.8.2 (default, Apr 27 2020, 15:53:34)
[GCC 9.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import openpyxl
>>> wb = openpyxl.Workbook()
>>> ws = wb.active
>>> ws.title
'Sheet'
>>> ws['a1'] = 520
>>> ws.append([1, 2, 3])
>>> import datetime
>>> ws['a3'] = datetime.datetime.now()
>>> wb.save('demo.xlsx')
将豆瓣 top 250 电影保存到 Excel。
爬虫部分请参阅:《Python 效率革命~爬虫。》
# -*- coding: utf-8 -*-
# import imp
import sys
import bs4
import openpyxl as openpyxl
import requests
def open_url(url):
# 使用代理。
# proxies = {"http": "127.0.0.1:1080", "https": "127.0.0.1:1080"}
headers = {'user-agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0"}
# res = requests.get(url, headers=headers, proxies=proxies)
res = requests.get(url, headers=headers)
return res
def find_movies(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
# 电影名。
movies = []
targets = soup.find_all('div', class_='hd')
for each in targets:
movies.append(each.a.span.text)
# 评分。
ranks = []
targets = soup.find_all('span', class_='rating_num')
for each in targets:
# ranks.append('评分:%s\t' % each.text)
ranks.append(each.text)
# 资料。
messages = []
targets = soup.find_all('div', class_='bd')
for each in targets:
try:
messages.append(each.p.text.split('\n')[1].strip() + each.p.text.split('\n')[2].strip())
except:
continue
result = []
length = len(movies)
for i in range(length):
# result.append(movies[i] + '\t' + ranks[i] + messages[i] + '\n')
result.append([movies[i], ranks[i], messages[i]])
return result
# 找出一共有多少页。
def find_depth(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
depth = soup.find('span', class_='next').previous_sibling.previous_sibling.text
return int(depth)
def save_to_excel(result):
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = '电影名称'
ws['B1'] = '评分'
ws['C1'] = '资料'
for each in result:
ws.append(each)
wb.save('./豆瓣 TOP250 电影.xlsx')
def main():
host = 'https://movie.douban.com/top250'
res = open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
url = host + '/?start=' + str(25 * i)
res = open_url(url)
result.extend(find_movies(res))
save_to_excel(result)
# with open('./豆瓣_TOP250_电影.txt', 'w') as f:
# for each in result:
# f.write(each)
if __name__ == '__main__':
print(sys.getdefaultencoding())
# imp.reload(sys)
# sys.setdefaultencoding('utf-8')
main()
打开 Excel 文件。
>>> import openpyxl
>>> workbook = openpyxl.load_workbook(r'/home/geek/PycharmProjects/geek_excel/豆瓣 TOP250 电影.xlsx')
>>> workbook.get_sheet_names()
<stdin>:1: DeprecationWarning: Call to deprecated function get_sheet_names (Use wb.sheetnames).
['Sheet']
>>> workbook.sheetnames
['Sheet']
>>>
获取工作表。
>>> wb.get_sheet_names()
<stdin>:1: DeprecationWarning: Call to deprecated function get_sheet_names (Use wb.sheetnames).
['Sheet']
>>> wb.sheetnames
['Sheet']
>>> ws = wb.get_sheet_by_name('Sheet')
<stdin>:1: DeprecationWarning: Call to deprecated function get_sheet_by_name (Use wb[sheetname]).
>>>
创建和删除工作表。
>>> nws = wb.create_sheet(index = 0, title = 'Geek')
>>> wb.get_sheet_names()
['Geek', 'Sheet']
>>> wb.sheetnames
['Geek', 'Sheet']
>>>
>>> nws = wb.create_sheet(index = 0, title = 'Geek')
>>> wb.get_sheet_names()
['Geek', 'Sheet']
>>> wb.sheetnames
['Geek', 'Sheet']
>>>
```python
>>> wb.remove_sheet(wb.get_sheet_by_name('Geek'))
<stdin>:1: DeprecationWarning: Call to deprecated function remove_sheet (Use wb.remove(worksheet) or del wb[sheetname]).
>>>
定位单元格。
>>> c = ws['A2']
>>> c.row
2
>>> c.column
1
>>> c.coordinate
'A2'
>>> ws['A2'].value
'肖申克的救赎'
>>> c.value
'肖申克的救赎'
>>> d = c.offset(2, 0)
>>> d.value
'阿甘正传'
>>>
‘AAA’ 是多少。
26 进制。
>>> openpyxl.cell.cell.get_column_letter(496)
'SB'
>>>
访问多个单元格。
>>> for each_movie in ws['A2':'B10']:
... for each_cell in each_movie:
... print(each_cell.value, end=' ')
... print('\n')
...
肖申克的救赎 9.7
霸王别姬 9.6
阿甘正传 9.5
这个杀手不太冷 9.4
美丽人生 9.5
泰坦尼克号 9.4
千与千寻 9.4
辛德勒的名单 9.5
盗梦空间 9.3
>>>
>>> for each_row in ws.rows:
... print(each_row[0].value)
...
>>> for each_row in ws.iter_rows(min_row=2, min_col=1, max_row=4, max_col=2):
... print(each_row[0].value)
...
肖申克的救赎
霸王别姬
阿甘正传
拷贝工作表。
>>> new = wb.copy_worksheet(ws)
>>> type(new)
<class 'openpyxl.worksheet.worksheet.Worksheet'>
>>> wb.save(r'/home/geek/PycharmProjects/geek_excel/豆瓣 TOP250 电影.xlsx')
个性化工作表标签栏。
>>> import openpyxl
>>> wb = openpyxl.Workbook()
>>> ws1 = wb.create_sheet(title = 'Geek')
>>> ws2 = wb.create_sheet(title = '李')
>>> ws3 = wb.create_sheet(title = '水能载舟亦可赛艇, ')
>>> ws4 = wb.create_sheet(title = '哈哈')
>>> ws1.sheet_properties.tabColor = 'FF0000'
>>> ws2.sheet_properties.tabColor = '00FF00'
>>> ws3.sheet_properties.tabColor = '0000FF'
>>> ws4.sheet_properties.tabColor = '8B008B'
>>> wb.save(r'./demo.xlsx')
调整行高和列宽。
>>> import openpyxl
>>> wb = openpyxl.Workbook()
>>> ws1 = wb.create_sheet(title = 'Geek')
>>> ws2 = wb.create_sheet(title = '李')
>>> ws3 = wb.create_sheet(title = '水能载舟亦可赛艇, ')
>>> ws4 = wb.create_sheet(title = '哈哈')
>>> ws1.sheet_properties.tabColor = 'FF0000'
>>> ws2.sheet_properties.tabColor = '00FF00'
>>> ws3.sheet_properties.tabColor = '0000FF'
>>> ws4.sheet_properties.tabColor = '8B008B'
>>> wb.save(r'./demo.xlsx')
>>> ws2.row_dimensions[2].height = 100
>>> ws2.column_dimensions['C'].width = 100
>>> wb.save(r'./demo.xlsx')
合并和拆分单元格。
>>> ws1.merge_cells('A1:C3')
>>> ws1['A1'] = 'I love Geek.'
>>> wb.save(r'./demo.xlsx')
>>> ws1.unmerge_cells('A1:C3')
冻结窗口。
>>> import openpyxl
>>> wb = openpyxl.load_workbook(r'/home/geek/demo.xlsx')
>>> ws = wb.active
>>> ws.freeze_panes = 'B8'
>>> wb.save(r'/home/geek/demo.xlsx')
解冻。
>>> ws.freeze_panes = 'A1'
>>> ws.freeze_panes = None
设置单元格字体。
>>> from openpyxl import Workbook
>>> from openpyxl.styles import Font
>>> wb = Workbook()
>>> ws = wb.active
>>> b2 = ws['B2']
>>> b2.value = 'Geek'
>>> bold_red_font = Font(bold=True, color='FF0000')
>>> b2.font = bold_red_font
>>>
>>> b3 = ws['B3']
>>> b3.value = 'Geek'
>>> italic_strike_blue_16font = Font(size=16, italic=True, strike=True, color='0000FF')
>>> b3.font = italic_strike_blue_16font
>>> wb.save(r'/home/geek/demo01.xlsx')
>>>
填充单元格。
>>> from openpyxl.styles import PatternFill
>>> b2.fill = yellow_fill
>>> wb.save('/home/geek/geek/py/demo01.xlsx')
>>>
>>> from openpyxl.styles import GradientFill
>>> red2green_fill = GradientFill(fill_type='linear', stop=('FF0000', '00FF00'))Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: __init__() got an unexpected keyword argument 'fill_type'
>>> red2green_fill = GradientFill(type='linear', stop=('FF0000', '00FF00'))
>>> wb.save('/home/geek/geek/py/demo01.xlsx')
设置边框。
>>> from openpyxl.styles import Border, Side
>>> thin_side = Side(border_style='thin', color='000000')
>>> double_side = Side(border_style='double', color='FF0000')
>>> b2.border = Border(diagonal=thin_side, diagonalUp=True, diagonalDown=True)
>>> b3.border = Border(left=double_side, top=double_side, right=double_side, bottom=double_side)
>>> wb.save('/home/geek/geek/py/demo01.xlsx')
文本对齐。
>>> from openpyxl.styles import Alignment
>>> ws.merge_cells('A1:C2')
>>> ws['A1'] = 'I love U'
>>> center_alignment = Alignment(horizontal='center', vertical='center')
>>> ws['A1'].alignment = center_alignment
>>> wb.save('/home/geek/geek/py/demo01.xlsx')
>>>
命名样式。
>>> from openpyxl.styles import NamedStyle
>>> highlight = NamedStyle(name='highlight')
>>> highlight.font = Font(bold=True, size=20)
>>> highlight.alignment = Alignment(horizontal='center', vertical='center')
>>> wb.add_named_style(highlight)
>>> ws['A1'].value = 'LOVE'
>>> ws['A1'].style = highlight
>>> wb.save('/home/geek/geek/py/demo01.xlsx')
>>>
数字格式。
>>> import openpyxl
>>> wb = openpyxl.Workbook()
>>> ws = wb.active
>>> ws.append(['文本', '数字'])
>>> ws['A2'] = '520'
>>> ws['B2'] = 520
>>> wb.save('/home/geek/geek/py/test.xlsx')
数字格式。
import datetime
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = 88.8
ws['A1'].number_format = '#,###.00鱼币'
ws['A2'] = datetime.datetime.today()
ws['A2'].number_format = 'yyyy-mm-dd'
wb.save('test2.xls')
- 正值;负值;零值;文本
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
# 正值;负值
ws['A1'].number_format = '[RED]+#,###.00;[GREEN]-#,###.00'
ws['A1'] = 99
# 正值;负值
ws['A2'].number_format = '[RED]+#,###.00;[GREEN]-#,###.00'
ws['A2'] = -99
# 正值;负值;零值;文本
ws['A3'].number_format = '[RED];[GREEN];[BLUE];[YELLOW]'
ws['A3'] = 0
# 正值;负值;零值;文本
ws['A4'].number_format = '[RED];[GREEN];[BLUE];[YELLOW]'
ws['A4'] = 'Geek'
wb.save(r'number.xlsx')
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
# 正值;负值
ws['A1'].number_format = '[RED]+#,###.00;[GREEN]-#,###.00'
ws['A1'] = 99
# 正值;负值
ws['A2'].number_format = '[RED]+#,###.00;[GREEN]-#,###.00'
ws['A2'] = -99
# 正值;负值;零值;文本
ws['A3'].number_format = '[RED];[GREEN];[BLUE];[YELLOW]'
ws['A3'] = 0
# 正值;负值;零值;文本
ws['A4'].number_format = '[RED];[GREEN];[BLUE];[YELLOW]'
ws['A4'] = 'Geek'
ws['A5'].number_format = '[=1]男;[=0]女'
ws['A5'] = 0
ws['A6'].number_format = '[=1]男;[=0]女'
ws['A6'] = 1
ws['A7'].number_format = '[=1]男;[=0]女'
ws['A7'] = 2
wb.save(r'number.xlsx')
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
# 正值;负值
ws['A1'].number_format = '[RED]+#,###.00;[GREEN]-#,###.00'
ws['A1'] = 99
# 正值;负值
ws['A2'].number_format = '[RED]+#,###.00;[GREEN]-#,###.00'
ws['A2'] = -99
# 正值;负值;零值;文本
ws['A3'].number_format = '[RED];[GREEN];[BLUE];[YELLOW]'
ws['A3'] = 0
# 正值;负值;零值;文本
ws['A4'].number_format = '[RED];[GREEN];[BLUE];[YELLOW]'
ws['A4'] = 'Geek'
ws['A5'].number_format = '[=1]男;[=0]女'
ws['A5'] = 0
ws['A6'].number_format = '[=1]男;[=0]女'
ws['A6'] = 1
ws['A7'].number_format = '[=1]男;[=0]女'
ws['A7'] = 2
ws['A8'].number_format = '[<60][RED]不及格;[>=60][GREEN]及格'
ws['A8'] = 58
ws['A9'].number_format = '[<60][RED]不及格;[>=60][GREEN]及格'
ws['A9'] = 68
wb.save(r'number.xlsx')
函数公式。
geek@ubuntu:~$ python3
Python 3.8.2 (default, Apr 27 2020, 15:53:34)
[GCC 9.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from openpyxl.utils import FORMULAE
>>> 'SUM' in FORMULAE
True
>>> 'SAM' in FORMULAE
False