目录
1、包安装
pip install python-docx
pip install docx之后都是报错。。
2、例程
指路官网:
https://python-docx.readthedocs.io/en/latest/index.html
官网给出的例程,注释掉加入图片之后可以正常运行。
from docx import Document
from docx.shared import Inches
document = Document()
document.add_heading('Document Title', 0)
p = document.add_paragraph('A plain paragraph having some ')
p.add_run('bold').bold = True
p.add_run(' and some ')
p.add_run('italic.').italic = True
document.add_heading('Heading, level 1', level=1)
document.add_paragraph('Intense quote', style='Intense Quote')
document.add_paragraph(
'first item in unordered list', style='List Bullet'
)
document.add_paragraph(
'first item in ordered list', style='List Number'
)
document.add_picture('monty-truth.png', width=Inches(1.25))
records = (
(3, '101', 'Spam'),
(7, '422', 'Eggs'),
(4, '631', 'Spam, spam, eggs, and spam')
)
table = document.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
for qty, id, desc in records:
row_cells = table.add_row().cells
row_cells[0].text = str(qty)
row_cells[1].text = id
row_cells[2].text = desc
document.add_page_break()
document.save('demo.docx')
3、程序
功能:BIOE的txt根据标签转换为word,不同标签的文本具有不同的颜色。
from random import randint
from docx import Document
from docx.shared import RGBColor
from docx.oxml.ns import qn
import re
readfile_path = 'test.txt'
def tagColor(tag_str):
if tag_str.find('BX') != -1: # 表现
color_str = '7030A0'
elif tag_str.find('BJ') != -1: # 病机
color_str = 'FFC000'
elif tag_str.find('ZM') != -1: # 证
color_str = '00B0F0'
elif tag_str.find('BZ') != -1: # 病证
color_str = 'FFFF00'
elif tag_str.find('BM') != -1: # 别名
color_str = '00B050'
else: # (O)
color_str = '000000'
return color_str
if __name__ == "__main__":
sentence = ''
sen_tag_list = []
doc = Document()
doc.styles['Normal'].font.name = u'宋体'
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
p = doc.add_paragraph()
with open(readfile_path, 'r', encoding='utf-8') as f:
try:
while True:
line = f.readline()
# print(line)
if (line != '\n'):
line = line.split(' ')
sentence += line[0]
sen_tag_list.append(line[1])
else:
sentence += '\n'
sen_tag_list.append(' ')
except:
f.close()
for i in range(len(sen_tag_list)):
#p = doc.add_paragraph()
color = tagColor(sen_tag_list[i])
#print(sen_tag_list[i], color)
run = p.add_run(sentence[i:i + 1])
run.font.color.rgb = RGBColor.from_string(color)
doc.save('final.docx')
4、函数
(1)paragraph.runs()
paragaph objects
Sequence of Run
instances corresponding to the <w:r> elements in this paragraph.
原始文本格式如图所示:
分为两段,代码中p[0]即指第一段。
from docx import Document
document = Document('1.docx')
p = document.paragraphs
for n in p[0].runs:
print('=====')
rgb = n.font.color.rgb
text = n.text
print('text:' + text)
print('rgb' + str(n.font.color.rgb))
'''
=====
text: 工作中会
rgb:None
=====
text: 遇到需要读
rgb:FF0000
=====
text: 取一个有几百页的word文档并从中整理出一些信息的需求,比如产品的API文档一般是word格式的。
rgb:None
'''