#########################docx文件############################
'''
.docx文件有很多结构,有3种不同的类型来表示
在最高一层,Document对象表示整个文档
Document对象包含一个Paragraph对象的列表,表示文档中的段落,以回车键为准
每个Paragraph对象包含一个Run对象的列表
'''
#########################读取Word文档########################
import docx
doc=docx.Document(r'C:\Users\shenlu\Desktop\demo.docx')
len(doc.paragraphs)
doc.paragraphs[0].text
doc.paragraphs[1].text
len(doc.paragraphs[1].runs)
doc.paragraphs[1].runs[0].text
doc.paragraphs[1].runs[1].text
doc.paragraphs[1].runs[2].text
doc.paragraphs[1].runs[3].text
########################从.docx文件中取得完整的文本########################
import docx
def getText(filename):
doc=docx.Document(filename)
fullText=[]
for para in doc.paragraphs:
#########每一段有缩进#############
###fullText.append(' '+para.text)
fullText.append(para.text)
###段落之间增加空行,return '\n\n'.join(fullText)
return '\n'.join(fullText)
########################从.docx文件中取得完整的文本########################
import readDocx
print (readDocx.getText('demo.docx'))
########################设置Paragraph和Run对象的样式########################
'''
对于Word文档,有3种类型的样式:
段落样式可以应用于Paragraph对象,字符样式可以应用于Run对象
链接的样式可以应用于这两种对象
默认Word样式的字符串如下:
'Normal' 'BodyText' 'BodyText2' 'BodyText3' 'Caption' 'Heading1' 'Heading2' 'Heading3' 'Heading4'
'Heading5' 'Heading6' 'Heading7' 'Heading8' 'Heading9' 'IntenseQuote' 'List' 'List2' 'List3'
'ListBullet' 'ListBullet2' 'ListBullet3' 'ListContinue' 'ListContinue2' 'ListContinue3' 'ListNumber' 'ListNumber2' 'ListNumber3'
'ListParagraph' 'MacroText' 'NoSpacing' 'Quote' 'Subtitle' 'TOCHeading' 'Title'
'''
'''
Run对象的text属性
属性 描述
bold 文本以粗体出现
italic 文本以斜体出现
underline 文本带下划线
strike 文本带删除线
double_strike 文本带双删除线
all_caps 文本以大写首字母出现
small_caps 文本以大写首字母出现,小写字母小两个点
shadow 文本带阴影
outline 文本以轮廓线出现,而不是实心
rtl 文本从右至左书写
imprint 文本以刻入页面的方式出现
emboss 文本以凸出页面的方式出现
'''
########################################################################
import docx
doc=docx.Document(r'C:\Users\shenlu\Desktop\demo.docx')
doc.paragraphs[0].text
doc.paragraphs[0].style
doc.save(r'C:\Users\shenlu\Desktop\demo.docx')
doc.paragraphs[0].style='Heading 1'
>>> doc.paragraphs[1].style
_ParagraphStyle('No Spacing') id: 124515664
doc.paragraphs[1].text
(doc.paragraphs[1].runs[0].text,doc.paragraphs[1].runs[1].text,doc.paragraphs[1].runs[2].text,doc.paragraphs[1].runs[3].text)
doc.paragraphs[1].runs[1].underline=True
doc.paragraphs[1].runs[3].underline=True
doc.save(r'C:\Users\shenlu\Desktop\demo.docx')
#################################写入Word文档################################
import docx
doc=docx.Document()
doc.add_heading('Header 0',0)
doc.add_heading('Header 1',1)
doc.add_heading('Header 2',2)
doc.add_heading('Header 3',3)
doc.add_heading('Header 4',4)
doc.add_paragraph('Hello world!')
doc.add_picture(r'C:\Users\shenlu\Desktop\DSCN0859.jpg',width=docx.shared.Inches(1),height=docx.shared.Cm(4))
paraObj1=doc.add_paragraph('This is a second paragraph.')
paraObj2=doc.add_paragraph('This is a yet another paragraph.')
paraObj1.add_run('This text is being added to the second paragraph.')
doc.save(r'C:\Users\shenlu\Desktop\helloworld.docx')
#################################添加换行符和换页符################################
import docx
from docx.enum.text import WD_BREAK
doc=docx.Document()
doc.add_paragraph('This is on the first page!')
#doc.paragraphs[0].runs[0].add_break() ###换行
doc.paragraphs[0].runs[0].add_break(WD_BREAK.PAGE) ###换页
doc.add_paragraph('This is on the second page!')
doc.save(r'C:\Users\shenlu\Desktop\twoPage.docx')
#################################定制邀请函################################
import docx,os
txtcontent=open(r'C:\Users\shenlu\Desktop\guests.txt','rb')
lines=txtcontent.readlines()
for line in lines:
print line
line=line.replace('\r\n','')
doc=docx.Document()
doc.add_paragraph('It would be a pleasure to have the company of')
doc.add_paragraph(line)
doc.add_paragraph('at 11010 memory laue on the euening of')
doc.add_paragraph('April lst')
doc.add_paragraph('at 7 o\'clock')
doc.save(os.path.join(r'C:\Users\shenlu\Desktop',line+'.docx'))
txtcontent.close()
#############################################################################
http://nostarch.com/automatestuff/