# 注意:使用docx之前需要先安装第三方库:python-docx
from docx import Document
# 导入数值计量单位:Inches- 英尺、Cm - 厘米、Pt - 像素
from docx.shared import Inches, Cm, Pt
1. 创建空文档
doc = Document()
2. 添加内容
1)文档对象.add_heading(text=’’, level=1) - 添加指定级别的标题,并且返回标题对象
# text - 标题内容
# level - 级别,最小是0,最大是9
h1 = doc.add_heading('1. 添加标题', level=0)
doc.add_heading('我是标题1', level=0)
doc.add_heading('我是标题2', level=1)
doc.add_heading('我是标题3', level=2)
print(h1)
doc.add_page_break()
2)添加段落文字: 文档对象.add_paragraph(text=’’)
doc.add_heading(‘2. 添加段落文字’, level=0)
p1 = doc.add_paragraph(‘概括了核心内容。’)
p2 = doc.add_paragraph(‘扫描更多新闻’)
3)插入段落:段落对象.insert_paragraph_before(text=’’) - 在指定段落前插入指定段落内容,返回插入的段落对象
p3 = p2.insert_paragraph_before(‘H E L L O W O R L D !’)
4)插入分页
插入分页后的内容会在新的一页中显示
doc.add_page_break()
5)插入图片
doc.add_picture(‘files/b.png’)
doc.add_picture(‘files/new3.png’, width=Cm(5))
6) 添加新的一页(和添加分页的功能一样!)
doc.add_section()
doc.add_heading(‘新一页!’)
7)添加表格
a.创建表格对象
table = doc.add_table(3, 4)
b.获取单元格: 表格对象.cell(行号, 列号)
cell1 = table.cell(0, 0)
cell1.text = ‘姓名’
cell2 = table.cell(0, 1)
cell2.text = ‘性别’
c.获取所有的行
row_2 = table.rows[1]
cell3 = row_2.cells[0]
cell3.text = ‘小明’
d.添加行、列
table.add_row()
table.add_column(Cm(3))
3.保存文件
doc.save(‘files/demo1.docx’)
from docx import Document
# 导入数值单位
from docx.shared import Cm, Pt, RGBColor
# 导入对齐方式对应的值
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml.ns import qn
doc = Document()
# 1. 段落样式
# style的值:ListBullet(文本内容前带点)、Normal(默认的)
p1 = doc.add_paragraph('我是段落1', style='ListBullet')
# 2.倾斜和加粗
p2 = doc.add_paragraph('我是不能单独设置样式的部分,')
run1 = p2.add_run('我是加粗的部分,')
# 加粗
run1.bold = True
run2 = p2.add_run('我是倾斜的部分,')
# 倾斜
run2.italic = True
run3 = p2.add_run('我是又加粗又倾斜的部分')
run3.bold = True
run3.italic = True
p3 = doc.add_paragraph()
run = p3.add_run('我整段都加粗:超链接的路径方式概括了核心内容。')
run.bold = True
# 3. 在表格中显示图片
table = doc.add_table(2, 3)
cell = table.cell(0, 0)
p = cell.add_paragraph()
run = p.add_run('文字\n')
run.add_picture('files/b.png')
# 4. 对齐方式
h1 = doc.add_heading('我是标题', level=0)
# 对齐方式的值:LEFT(左对齐)、RIGHT(右对齐)、CENTER(水平居中)、JUSTIFY_MED(垂直方向居中)、JUSTIFY_HI(顶部对齐)、JUSTIFY_LOW(底部对齐)
h1.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 5.表格内容对齐
table2 = doc.add_table(2, 3)
cell1 = table2.cell(0, 1)
p = cell1.add_paragraph('姓名')
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 6.设置字体
# 字体的设置只能在run对象上进行设置
h1 = doc.add_heading(level=1)
run = h1.add_run('标题显示内容')
# 设置字体大小
run.font.size = Pt(50)
# 设置字体颜色
run.font.color.rgb = RGBColor(255, 0, 0)
# 设置字体名称
run.font.name = u'宋体'
# 注册字体(使用字体后必须注册)
run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
doc.save('files/demo2.docx')
from docx import Document
# 1.打开word文件创建文档对象
doc = Document('files/2.docx')
# 2.获取段落内容
# 1)获取整个文档中所有的段落
all_p = doc.paragraphs
# print(all_p)
# 2)获取段落中的文字内容
# 段落对象.text
for p in all_p:
print(f'===:{p.text}')
# 3.获取表格内容
# 1)获取所有的表
all_table = doc.tables
print(all_table)
# 2)获取表中单元格内容
table = all_table[-1]
cell = table.cell(1, 2)
# 获取单元格内容
# print(cell.text) # 1393.37
# 修改单元格内容
cell.text = '100'
doc.save('files/2.docx')
作业
import csv
from collections import Counter
reader = csv.DictReader(open('files/lagou.csv'))
result = {}
for x in reader:
city = x['city']
result[city] = result.get(city, 0) + 1
n = Counter(result)
print('数据分析岗位需求最多的前五名城市:', n.most_common(5))
reader = csv.DictReader(open('files/lagou.csv'))
result1 = {}
for x in reader:
education = x['education']
result1[education] = result1.get(education, 0) + 1
print('数据分析岗位学历要求:', result1)
import csv
from re import *
reader = csv.DictReader(open('files/lagou.csv'))
add_city = []
for x in reader:
city = x['city']
if city not in add_city:
add_city.append(city)
nums = len(add_city)
average_salary = {}
for i in range(nums):
list1 = []
reader = csv.DictReader(open('files/lagou.csv'))
for x in reader:
city = x['city']
if x['city'] == add_city[i]:
result = findall(r'\d+', x['salary'])
numbers = [float(x) for x in result]
list1.append(sum(numbers) / 2)
print('%s数据分析平均薪资:' % add_city[i], f'{(sum(list1) / len(list1)):.1f}k')