用ptyhon和vba清除Word的标题样式保留字体格式,生成标题目录。

不同格式的word文档合并为一个文件。

问题:

文件汇编,需要将70多个文件汇编成一个到一个文件里。最终汇编的方式是用word--插入--对象--文件中的文字。

但是由于原始文件有的设置了自动编号,合并文档后,编号导致混乱。有的没有设置标题样式,提取不出来目录。

最终汇总的时候,需要清除自动编号的域,已经生成的编号转换成文字,统一设置标题格式。

同一个段里,字体还不一样,需要清除Word的标题样式保留字体格式。

解决方式:

开始想用python+docx库解决。但是docx库在原始文档没有style的情况下,无论如何添加不进去。

if match(run.text,"第*章*"): #匹配模式为问号,及匹配一个任意字符
   document.add_heading("",level=1)
   paragraph.style = document.styles['heading 1']

这样的总是出错,因为heading不存在,查了官方文档,说word默认有很多样式,但是你不起用的话,文档里是没有这个样式的。docx文档里也没说怎么解决。

"no style with name 'Heading 1'"

换解决思路:

用 win32com 操作

里面用到一句

ActiveDocument.ConvertNumbersToText

在VBA里运行没有问题,但是win32调用也出错。

查找过程中查到一个用WIN32直接运行宏命令的方式,算球,全部代码写在vba里,python循环目录和重命名就好。这样的好处是,python只负责循环和重命名,有需求直接修改vba.bas就好了,宏兼容性最高。

于是代码如下:

import os
import glob

from win32com.client import constants

from win32com.client import Dispatch #需要安装的是pypiwin32模块

App=Dispatch('Word.Application')
App.Visible=0

def addmaco(file):
   
    App.DisplayAlerts = 0
    doc = App.Documents.Open(file)
    #导入宏代码

    #通过文档实例,获取VBProject的组件,其中VBComponents中的参数至关重要,因为ThisDocument表示该文档,也就是说所有这篇生成文档的操作在该组件中都可以捕获#到,那么就可以在里面创建Document_Open函数来监控文档被打开

    docCode = doc.VBProject.VBComponents("ThisDocument").CodeModule
    macro = ""

    #vba.bas为宏文件,需要导入到ThisDocument,ThisDocument即对应word下,按Alt+F11,调出vba窗口,该文档下的Microsoft Word对象下的ThisDocument。
    string=open("vba.bas", encoding='utf-8')
    macro=string.read()
    docCode.AddFromString(macro)
    #doc.Save()   #添加宏这里不能保存,因为word无法保存为不含宏的文件.但是添加宏以后就可以运行了。python里就是用到这个功能
    
def DelMacro(file): #添加宏
    App.DisplayAlerts = 0
    doc = App.Documents.Open(file) 
    doc.Save() 



def get_file_path(root_path,file_list,dir_list):
    #获取该目录下所有的文件名称和目录名称
    dir_or_files = os.listdir(root_path)
    for dir_file in dir_or_files:
        #获取目录或者文件的路径
        dir_file_path = os.path.join(root_path,dir_file)
        #判断该路径为文件还是路径
        if os.path.isdir(dir_file_path):
            dir_list.append(dir_file_path)
            #递归获取所有文件和目录的路径
            get_file_path(dir_file_path,file_list,dir_list)
        else:
            file_list.append(dir_file_path)


root_path = r"D:\企业制度"

file_list = []
dir_list = []
path='d:/'
get_file_path(root_path,file_list,dir_list)



for dirlst in dir_list:
    path=dirlst
    print(path)
    preName="" #二级目录
    dir=(dirlst.split("\\"))                            
    if (len(dir))>2:
        preName=(dir[2])

    old_names = os.listdir(path)  #取路径下的文件名,生成列表


    for old_name in old_names:      #遍历列表下的文件名
        if os.path.isfile(path + "\\" +old_name): #防止文件夹加上.pdf后缀,先判断是否是文件,必须带路径
            if old_name.endswith('.doc') or old_name.endswith('.docx'):
                
                file=path + "\\" +old_name
                print("打开:"+file)

                doc = App.Documents.Open(file)
                addmaco(file)
                App.Application.Run("numTotext")
           
                filename=App.ActiveDocument.Sentences(1).Text.replace('\r','') #删除字符串里的回车符
                if filename=="": #如果第一行为空,取第二行
                    filename=App.ActiveDocument.Sentences(2).Text.replace('\r','')
                    filename=filename.replace('\t','')


                filename=filename.replace('\t','')
                #filenamePara=str(App.ActiveDocument.Paragraphs(1)).replace('\r','')
                #filenamePara=filenamePara.replace('\t','')
                #print(filenamePara)
                print(filename)       
                newpath="d:\\企业制度统一格式\\"+preName
                Newfile=newpath+"\\"+preName+"_"+str(filename)
                print(Newfile)
                doc.SaveAs(Newfile,16) #将所有doc,docx都统一保存为docx,后面带16

                doc.Close()
App.Quit() 

vba.bas文件如下

Sub numTotext()
    '数字转文本 
    '仅list是 ActiveDocument.range.listformat.ConvertNumbersToText
    ActiveDocument.ConvertNumbersToText '自动编号的数字会变成文本,但是域还在
    '删除第一行'单位名称'

    Selection.HomeKey Unit:=wdStory
    Selection.Find.ClearFormatting
    Selection.Find.Replacement.ClearFormatting
    With Selection.Find
        .Text = "单位名称^p" '包括换行
        .Replacement.Text = ""
        .Forward = True
        .Wrap = wdFindAsk
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchByte = True
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
        .Execute Replace:=wdReplaceOne
    End With
    
    Selection.HomeKey Unit:=wdStory '回到首行

    Dim searchStr
    searchStr = Chr(13)
    If (InStr(ActiveDocument.Paragraphs(1).Range.Text, searchStr) = 1) Then     '如果第一行是回车,删除
        Selection.Range.Delete
    End If



    '去除底纹
    Selection.WholeStory 
    Options.DefaultHighlightColorIndex = wdNoHighlight
    Selection.Range.HighlightColorIndex = wdNoHighlight
'Sub 清除Word的标题样式保留字体格式()
     Dim para As Paragraph
     Dim fnt As Font
     Dim pfmt As Variant
     Dim lineup1 As Variant
     Dim lineup2 As Variant
     Dim linedo1 As Variant
     Dim linedo2 As Variant
     Dim fntName As Variant
     Dim fntsize As Variant
     Dim shSj As Variant
     Dim lineSpace As Variant
     Dim lnSpcrule As Variant
     Dim alnMent As Variant
     
    With ActiveDocument.Styles("正文").Font
        .NameFarEast = "仿宋_GB2312"
        .NameAscii = "Calibri"
        .NameOther = "Calibri"
        .Name = "Calibri"
        .Size = 16
    End With

     
    '无标记
    With ActiveWindow.View.RevisionsFilter
        .Markup = wdRevisionsMarkupNone
        .View = wdRevisionsViewFinal
    End With

    '接受修订
    ActiveDocument.AcceptAllRevisions
    ActiveDocument.TrackRevisions = False
     
     
     For Each para In ActiveDocument.Paragraphs
         With para
             'If .Style <> ActiveDocument.Styles("正文") 不同的文章的正文格式也不一样,所以无论什么格式通通成正文
             Set fnt = .Range.Font '转换样式之前,先记下来字体
             Set pfmt = .Style.ParagraphFormat
             fntName = .Range.Font.Name
             'MsgBox (.Range.Text)
             fntsize = .Range.Font.Size                                                    
             
         
             alnMent = .Range.ParagraphFormat.Alignment
             shSj = .Range.ParagraphFormat.FirstLineIndent
             'lineup1 = .Style.ParagraphFormat.LineUnitBefore
             'lineup2 = .Style.ParagraphFormat.SpaceBefore
             'linedo1 = .Style.ParagraphFormat.LineUnitAfter
             'linedo2 = .Style.ParagraphFormat.SpaceAfter
             
             'fnt = .Range.Font
             'pfmt = .Range.ParagraphFormat.LineSpacing
             lineup1 = .Range.ParagraphFormat.LineUnitBefore
             lineup2 = .Range.ParagraphFormat.SpaceBefore
             linedo = .Range.ParagraphFormat.LineUnitAfter
             linedo2 = .Range.ParagraphFormat.SpaceAfter
             lnSpcrule = .Range.ParagraphFormat.LineSpacingRule
             lineSpace = .Range.ParagraphFormat.LineSpacing
             
             .Style = ActiveDocument.Styles("正文")
             
             .Range.ListFormat.RemoveNumbers '清除自动编号的代码
             .Range.Font = fnt
             .Range.Font.Name = fntName
             .Range.ParagraphFormat.Alignment = alnMent
             
             .Range.Font.Size = fntsize
             '.Range.ParagraphFormat.LineSpacing = pfmt
             If lineSpace < =1 Then
                lineSpace = 1
             End If
             .Range.ParagraphFormat.LineSpacing = lineSpace
             '.Range.ParagraphFormat = pfmt
             .Range.ParagraphFormat.FirstLineIndent = shSj

            

             .Range.ParagraphFormat.LineUnitBefore = lineup1
             .Range.ParagraphFormat.SpaceBefore = lineup2
             .Range.ParagraphFormat.LineUnitAfter = linedo1
             .Range.ParagraphFormat.SpaceAfter = linedo2
             .Range.ParagraphFormat.LineSpacingRule = lnSpcrule
             .Range.ParagraphFormat.LineSpacing = lineSpace

             
             'End If
         End With
     Next








    '设置页面大小
    'With ActiveDocument.PageSetup
    '        .LineNumbering.Active = False
    '        .Orientation = wdOrientPortrait
    '        .TopMargin = CentimetersToPoints(3.7)
    '        .BottomMargin = CentimetersToPoints(3.5)
    '        .LeftMargin = CentimetersToPoints(2.6)
    '        .RightMargin = CentimetersToPoints(2.6)
   
    'End With

    

    
    With ActiveDocument.Styles("标题 1").Font
        .NameFarEast = "方正小标宋简体"
        .Size = 22
    End With
    With ActiveDocument.Styles("标题 1").ParagraphFormat
        .LeftIndent = CentimetersToPoints(0)
        .RightIndent = CentimetersToPoints(0)
        .SpaceBefore = 0
        .SpaceBeforeAuto = False
        .SpaceAfter = 0
        .SpaceAfterAuto = False
        .LineSpacingRule = wdLineSpaceSingle
        .Alignment = wdAlignParagraphCenter
        .WidowControl = True
        .KeepWithNext = False
        .KeepTogether = False
        .PageBreakBefore = False
        
    End With

    With ActiveDocument.Styles("标题 1")
        .AutomaticallyUpdate = False
        .BaseStyle = "正文"
        .NextParagraphStyle = "标题 2"
    End With



    ActiveDocument.Paragraphs(1).Range.Select    
    Selection.Style = ActiveDocument.Styles("标题 1")

    
    For i = 1 To 10 '循环十次,替换空格
        Selection.WholeStory
        Selection.Find.ClearFormatting
        Selection.Find.Replacement.ClearFormatting
        With Selection.Find
            .Text = "章  "
            .Replacement.Text = "章 "
            .Execute Replace:=wdReplaceAll
        End With
    Next i
    
    '添加分页
    Selection.EndKey Unit:=wdStory
    Selection.InsertBreak Type:=wdPageBreak


    
End Sub

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值