VBA 收集 Word关键字批量处理-Excel版
copy /b 图片.gif /b + 压缩包.zip /b 结果图片.gif
- 20220321
1 修复遍历多级目录时生成文件放错位置的BUG。
2 优化开始按钮,改为切换开始暂停(其实就是结束,再次开始,可以继续处理,只有实时日志会清空重新开始打印)。
3 优化性能,没2000个文档,重启一次Word。(切记要隐藏运行,如果显示界面,未和谐的授权窗口会卡住需要手点跳过)虽然这么做个,但是暂时还不知道变慢的原因,内存CPU都显示正常。
UserForm1(窗体代码)
窗体逻辑主要是:
- 窗体内容初始化。
- 控件事件处理。
Private Sub UserForm_Initialize()
Dim currPath$, currName$
currPath = ThisWorkbook.path & "\"
currName = Left(ThisWorkbook.Name, InStrRev(ThisWorkbook.Name, ".") - 1)
源文件的目录TextBox.Text = currPath & SOURCE_FILE_PATH
完成文件的目录TextBox.Text = currPath & FINISHED_FILE_PATH
失败文件的目录TextBox.Text = currPath & ERROR_FILE_PATH
跳过文件的目录TextBox.Text = currPath & SKIP_FILE_PATH
成功日志TextBox.Text = currPath & currName & SUCCESS_FILE_SUFFIX
失败日志TextBox.Text = currPath & currName & ERROR_FILE_SUFFIX
跳过日志TextBox.Text = currPath & currName & SKIP_FILE_SUFFIX
successLogFile = 成功日志TextBox.Text
errLogFile = 失败日志TextBox.Text
skipLogFile = 跳过日志TextBox.Text
Set myConsole = 日志窗口TextBox
showDoc = 处理时显示文档CheckBox.Value
With Me.WebBrowser1
.Navigate "about:blank"
.Document.Write "<body scroll='no' style='margin: 0;border = 0;'><img id='img' src='https://i-blog.csdnimg.cn/blog_migrate/9af2881993812890d22da660c63057f4.gif' style='width: 100%;height:100%;'></body>"
End With
子目录深度ScrollBar.Min = 0
日志窗口TextBox.Text = "日志窗口:" & vbCrLf & vbCrLf & " 笑 虾" & vbCrLf & "天上游龙水中蛟,不羡高飞入云霄。" & vbCrLf & "生来无事终天笑,未曾到老先弯腰。" & vbCrLf & vbCrLf
End Sub
Private Sub UserForm_Activate()
' Call 刷新目录结构(源文件的目录TextBox.Text, 0)
' 子目录深度ScrollBar.Max = subFolderMaxLeve
子目录深度ScrollBar.Value = 0
End Sub
Private Sub 获取源文件目录Button_Click()
Dim path$, arr() As String
源文件的目录TextBox.Text = 选择目录()
Call 刷新目录结构(源文件的目录TextBox.Text, 子目录深度ScrollBar.Value)
End Sub
Private Sub 成功日志TextBox_Change()
successLogFile = 成功日志TextBox.Text
End Sub
Private Sub 失败日志TextBox_Change()
errLogFile = 失败日志TextBox.Text
End Sub
Private Sub 跳过日志TextBox_Change()
skipLogFile = 跳过日志TextBox.Text
End Sub
Private Sub 子目录深度ScrollBar_Change()
子目录深度TextBox.Value = 子目录深度ScrollBar.Value
Call 刷新目录结构(源文件的目录TextBox.Text, 子目录深度ScrollBar.Value)
End Sub
Private Sub 处理时显示文档CheckBox_Change()
On Error Resume Next
showDoc = 处理时显示文档CheckBox.Value
wordApp.Visible = showDoc
End Sub
Private Sub start()
' 选择要处理的文件所在
If 源文件的目录TextBox.Text = "" Then
源文件的目录TextBox.Text = 选择目录()
End If
If MsgBox("要处理的文件在:" & 源文件的目录TextBox.Text, vbYesNo + vbInformation, "确认源文件目录") <> vbYes Then
开始暂停ToggleButton.Caption = "开 始"
开始暂停ToggleButton.Value = False
Exit Sub
End If
Call 遍历文件夹中对文档的关键字打标记(源文件的目录TextBox.Text, 完成文件的目录TextBox.Text, 失败文件的目录TextBox.Text, 跳过文件的目录TextBox.Text, 日志窗口TextBox)
End Sub
Private Sub 开始暂停ToggleButton_Click()
If 开始暂停ToggleButton.Value Then
开始暂停ToggleButton.Caption = "暂 停"
Debug.Print 开始暂停ToggleButton.Value & "暂 停"
Call start
Else
开始暂停ToggleButton.Caption = "开 始"
Debug.Print 开始暂停ToggleButton.Value & "开 始"
End If
End Sub
Private Sub csdn博客Label_Click()
Shell "cmd /c start https://jerryjin.blog.csdn.net/article/details/123596090", vbHide
End Sub
Private Sub 刷新目录结构(目标文件夹 As String, subLevel As Integer)
Call 更新文件夹结构信息(目标文件夹, subLevel)
子目录深度ScrollBar.Max = subFolderMaxLeve
infoLog ("======================获取目录结构成功======================")
Call infoLog(subFolderString, "", "", "", vbCrLf)
End Sub
业务逻辑
遍历文档,查找替换的业务逻辑都在这。
- 遍历文件使用了
vba
的Dir("目标文件夹")
方法。第一次目录参数,第二次不带参,就可以逐个返回下一文件,直到返回空字符串
结束。 - 移动文件使用了:
Scripting.FileSystemObject
- 输出日志文件用的是
VBA
的Shell "cmd.exe /c echo 日志内容 >> 日志文件", vbHide
,第二个参数vbHide
表示隐藏执行。
Option Explicit
Public Const SOURCE_FILE_PATH As String = "sourceData\" ' 要处理的文件所在
Public Const FINISHED_FILE_PATH As String = "newData\" ' 存完成文件的目录名
Public Const ERROR_FILE_PATH As String = "errorData\" ' 存出错文件的目录名
Public Const SKIP_FILE_PATH As String = "skipData\" ' 存跳过文件的目录名
Public Const DELIMS As String = "," ' 关键字分隔符
Public Const DEFULT_REPLACEMENT_TEXT As String = "^&" ' 默认替换字符
Public Const STYLE_NAME As String = "关键字" ' 样式名
Public Const DEL_FLAG As String = "【del】" ' 获取所有文件夹时使用的过滤删除标记。
Public Const ERROR_FILE_SUFFIX As String = "-Err.log" ' 出错日志后缀
Public Const SKIP_FILE_SUFFIX As String = "-Skip.log" ' 跳过日志后缀
Public Const SUCCESS_FILE_SUFFIX As String = "-Success.log" ' 跳过日志后缀
Public successLogFile As String ' 成功日志
Public errLogFile As String ' 错误日志
Public skipLogFile As String ' 跳过记录的日志
Public myConsole As Object ' 跳过记录的日志
Public showDoc As Boolean ' 显示word
Public subFolderArr() As String ' 需要遍历的目录结构
Public subFolderRelativePathArr() As String ' 需要目录结构相对路径(以原文件目录为基准)
Public subFolderMaxLeve As Integer ' 需要遍历的目录结构最大深度
Public subFolderString As String ' 需要遍历的目录结构(字符串)
Public fs As Object ' 文件系统对象
Public wordApp As Word.Application ' word 对象
Private keyArray() As String ' 需要处理的关键字,载入此数组
Private keyArrLen As Integer ' 需要处理的关键字个数
Sub 遍历文件夹中对文档的关键字打标记(sourceFilePath As String, newPath As String, errPath As String, skipPath As String, logTextBox As Object)
On Error GoTo ErrorHandler
'currPath$,
Dim CurrFile$, CurrFileName$, currDoc As Word.Document, tempFileName As String, pathLen As Integer, path_i As Integer
' --------- 初始化 开始 ----------
' 准备 word 对象
Call clearLog
Call infoLog("1. 初始化 word 对象……")
Set wordApp = 获取wordApp实例()
wordApp.Visible = showDoc
Call infoLog("2. 初始化 word 对象完成!^_^")
' 获取个当前位置信息
' currPath = ThisWorkbook.path & "\"
CurrFileName = ThisWorkbook.Name
Call infoLog("3. 定位当前文档位置成功!")
' 创建文件系统对象
Set fs = CreateObject("Scripting.FileSystemObject")
Call infoLog("4. 获取 FileSystemObject 成功!")
' 准备文件夹:复制文件夹结构
If Dir(newPath, vbDirectory) = vbNullString Then Call 复制文件夹结构(sourceFilePath, newPath) 'MkDir newPath
Call infoLog("5. 成功文件目录准备完毕!")
If Dir(skipPath, vbDirectory) = vbNullString Then Call 复制文件夹结构(sourceFilePath, skipPath) 'MkDir skipPath
Call infoLog("6. 跳过文件目录准备完毕!")
If Dir(errPath, vbDirectory) = vbNullString Then Call 复制文件夹结构(sourceFilePath, errPath) ' MkDir errPath
Call infoLog("7. 失败文件目录准备完毕!")
' 从excel表读取取关键字
keyArray = 获取关键字()
keyArrLen = UBound(keyArray)
Call infoLog("8. 加载关键字数据完成!")
' --------- 初始化 结束 ----------
Call infoLog("9. 开始处理文档……")
UserForm1.WebBrowser1.Visible = True
' -------------------------- 遍历目录 开始 --------------------------
Dim tempNewPath$, tempSkipPath$, tempErrPath$
pathLen = UBound(subFolderArr)
For path_i = 0 To pathLen
' 获取当前目录的成功失败跳过等相关路径
sourceFilePath = subFolderArr(path_i)
tempNewPath = newPath & subFolderRelativePathArr(path_i)
tempSkipPath = skipPath & subFolderRelativePathArr(path_i)
tempErrPath = errPath & subFolderRelativePathArr(path_i)
Call infoLog(sourceFilePath, "【开始处理文件夹】:", "", "", vbCrLf)
CurrFile = Dir(sourceFilePath)
' ------------- 遍历目录中的文件 开始 -------------
Do Until CurrFile = ""
If Right(CurrFile, 5) = ".docx" Or Right(CurrFile, 4) = ".doc" Then
tempFileName = sourceFilePath & CurrFile
Set currDoc = wordApp.Documents.Open(tempFileName, Visible:=showDoc)
' Debug.Print currDoc.Content
' 找到关键字的,另存一份到 newPath 下
If 对关键字打标记(currDoc) Then
currDoc.SaveAs2 Filename:=tempNewPath & CurrFile, FileFormat:=wdFormatXMLDocument
Kill tempFileName
currDoc.Close wdDoNotSaveChanges
Set currDoc = Nothing
successlog tempFileName
UserForm1.成功数量TextBox.Value = UserForm1.成功数量TextBox.Value + 1
Else
currDoc.Close wdDoNotSaveChanges
Set currDoc = Nothing
skiplog tempFileName
Call 移动文件(tempFileName, tempSkipPath & CurrFile)
UserForm1.跳过数量TextBox.Value = UserForm1.跳过数量TextBox.Value + 1
End If
End If
NextFile:
DoEvents
' 如果按下暂停按钮
If UserForm1.开始暂停ToggleButton.Value = False Then
Call infoLog("暂停中。。。。。。")
wordApp.Quit ' 关闭 word
Exit Sub
End If
' 优化性能 每2000次重启一下word
If (0 + UserForm1.成功数量TextBox.Value + UserForm1.跳过数量TextBox.Value + UserForm1.失败数量TextBox.Value) Mod 2000 = 0 Then
Call infoLog("优化性能:Word 重启中。。。。。。")
wordApp.Quit ' 关闭 word
Set wordApp = Nothing
Set wordApp = CreateObject("Word.Application")
wordApp.Visible = showDoc
End If
CurrFile = Dir()
Loop
' ------------- 遍历目录中的文件 结束 -------------
Next
' -------------------------- 遍历目录 结束 --------------------------
Set fs = Nothing
' 处理完毕重置UI
wordApp.Visible = True
UserForm1.WebBrowser1.Visible = False
UserForm1.处理时显示文档CheckBox.Value = True
UserForm1.开始暂停ToggleButton.Caption = "开 始"
UserForm1.开始暂停ToggleButton.Value = False
'wordApp.Quit ' 关闭 word
Call MsgBox("处理完毕,共处理 " & (0 + UserForm1.成功数量TextBox.Value + UserForm1.跳过数量TextBox.Value + UserForm1.失败数量TextBox.Value) & "个文档!", vbOKOnly + vbInformation, "温馨提示")
Exit Sub
ErrorHandler:
UserForm1.失败数量TextBox.Value = UserForm1.失败数量TextBox.Value + 1
errlog "================================================================================"
errlog "【错误文件】" & tempFileName
errlog Err.Number & ":" & Replace(Err.Description, vbLf, " vbCrLf ")
Call 移动文件(tempFileName, tempErrPath & CurrFile)
Resume NextFile
End Sub
Function 对关键字打标记(doc As Word.Document)
On Error GoTo ErrorHandler
Dim i As Integer, edited As Boolean ' 默认未编辑状态false
Call 创建样式(doc, STYLE_NAME)
' 遍历查找关键字,并标示。 keyArray = [0源字符, 1目标字符, 2替换方式, 3高亮]
For i = 0 To keyArrLen
With doc.Content.Find
.ClearFormatting ' 清除上一次的参数
.Replacement.ClearFormatting ' 清除上一次的参数
.Forward = True ' 向前查找
.Wrap = wdFindContinue ' 到达搜索范围的开始或结尾时,继续执行查找操作
.Text = keyArray(i, 0) ' 查找文字
.Replacement.Text = keyArray(i, 1) ' 替换文字
.MatchWildcards = True ' 支持通配符
If keyArray(i, 3) = "是" Then
.Replacement.Style = STYLE_NAME
Else
.Replacement.ClearFormatting
End If
Call .Execute(Replace:=keyArray(i, 2)) ' 执行替换
NextKey:
' 找到了关键字,标记为编辑过。
If .Found Then edited = True
End With
DoEvents
Next
对关键字打标记 = edited
Exit Function
ErrorHandler:
errlog "================================================================================"
errlog "【对关键字打标记出错】" & keyArray(i, 0)
errlog Err.Number & ":" & Replace(Err.Description, vbLf, " vbCrLf ")
Resume NextKey
End Function
Function 创建样式(doc As Word.Document, styleName As String)
On Error Resume Next ' 出错时忽略,继续向下运行。
' 判断样式,不存在则创建
Dim flag As Boolean
flag = doc.Styles(styleName).NameLocal = styleName
If flag Then
Exit Function
End If
doc.Styles.Add Name:=styleName, Type:=wdStyleTypeCharacter
With doc.Styles(styleName).Font
' .NameFarEast = "微软雅黑"
.Bold = True
.Color = wdColorYellow
.Shading.ForegroundPatternColor = wdColorAutomatic
.Shading.BackgroundPatternColor = wdColorRed
End With
End Function
' keyArray = [0源字符, 1目标字符, 2替换方式, 3高亮]
Function 获取关键字() As String()
Dim myRanges As Range, keyArray() As String, arrLen As Integer, i As Integer, j As Integer, dict As Object
' 字段名与列号对应存入字典
Set dict = CreateObject("Scripting.Dictionary")
Call dict.Add("源字符", Range("b1:e1").Find("源字符").Column - 1)
Call dict.Add("目标字符", Range("b1:e1").Find("目标字符").Column - 1)
Call dict.Add("替换方式", Range("b1:e1").Find("替换方式").Column - 1)
Call dict.Add("高亮", Range("b1:e1").Find("高亮").Column - 1)
' 数据行数
arrLen = Range(Range("B2"), Range("B2").End(xlDown)).Rows.Count
' 数据范围
Set myRanges = Worksheets("关键字").Range(Range("B2"), Range("E2").Offset(arrLen - 1, 0))
' 重置动态数组的长度
ReDim keyArray(arrLen - 1, 3) As String
For i = 0 To arrLen - 1
keyArray(i, 0) = myRanges(i + 1, dict.Item("源字符"))
keyArray(i, 1) = myRanges(i + 1, dict.Item("目标字符"))
If myRanges(i + 1, dict.Item("替换方式")) = "首个" Then
keyArray(i, 2) = 1 ' wdReplaceOne 替换遇到的第一个匹配项。
Else
keyArray(i, 2) = 2 ' wdReplaceAll 替换所有匹配项。
End If
keyArray(i, 3) = "是"
Next i
获取关键字 = keyArray
End Function
' 移动文件
Sub 移动文件(sourcePath As String, targetPath As String)
On Error GoTo ErrorHandler
Call fs.moveFile(sourcePath, targetPath)
Error_Handler_Exit:
Exit Sub
ErrorHandler:
errlog "================================================================================"
errlog "【移动文件失败】" & sourcePath
errlog Err.Number & ":" & Replace(Err.Description, vbLf, " vbCrLf ")
Resume Error_Handler_Exit
End Sub
' 获取word应用,失败就创建一个新的
Function 获取wordApp实例()
On Error Resume Next
Set 获取wordApp实例 = GetObject(, "Word.Application")
If Err.Number <> 0 Then
Set 获取wordApp实例 = CreateObject("Word.Application")
End If
End Function
' 写日志
Sub errlog(logMsg As String)
Shell "cmd.exe /c echo " & Format(Now, "YYYY-MM-DD HH:MM:SS") & " 》" & logMsg & " >> " & errLogFile, vbHide
Call infoLog(logMsg, "【失败】:")
End Sub
Sub skiplog(logMsg As String)
Shell "cmd.exe /c echo " & logMsg & " >> " & skipLogFile, vbHide
Call infoLog(logMsg, "【跳过】:")
End Sub
Sub successlog(logMsg As String)
Shell "cmd.exe /c echo " & logMsg & " >> " & successLogFile, vbHide
Call infoLog(logMsg, "【成功】:")
End Sub
Sub infoLog(logMsg As String, Optional logType As String = "【信息】:", Optional logTime As String = "now", Optional logSeparator As String = " ===》 ", Optional logEnd As String = "")
myConsole.Text = myConsole
With myConsole
.SetFocus
.Text = .Text & vbCrLf & logType & VBA.IIf(logTime = "now", Format(Now, "YYYY-MM-DD HH:MM:SS"), logTime) & logSeparator & logMsg & logEnd
.SelStart = Len(.Value)
End With
DoEvents
End Sub
Sub clearLog()
myConsole.Text = ""
End Sub
工具模块
- 遍历文件夹,看了网上的方案感觉效率不太给力,这里直接调
CMD
命令曲线救国了。dir C:\原目录 /b/s *.doc?
- 批量拷贝目录结构,不带文件。
xcopy C:\原目录 C:\目标目录 /t/i
Function 选择目录()
With Application.FileDialog(msoFileDialogFolderPicker)
.InitialFileName = ThisWorkbook.path & "\"
If .Show = -1 Then ' OK返回 -1,Cancel 返回 0
选择目录 = .SelectedItems(1)
Else
选择目录 = ""
End If
End With
End Function
Function 统计字符串出现次数(sourceStr As String, searchStr As String) As Long
On Error GoTo Error_Handler
统计字符串出现次数 = UBound(Split(sourceStr, searchStr))
Error_Handler_Exit:
Exit Function
Error_Handler:
Resume Error_Handler_Exit
End Function
Function 执行cmd命令(cmdStr As String) As String
On Error Resume Next
Dim oShell As Object ' WScript.Shell
Dim oExec As Object ' WScript.Shell的Exec执行结果对象
Set oShell = CreateObject("WScript.Shell")
Set oExec = oShell.Exec("cmd /c " & cmdStr)
执行cmd命令 = oExec.StdOut.ReadAll
oShell.Quit
Set oExec = Nothing
Set oShell = Nothing
End Function
' 刷新【子文件夹结构数组】【子文件夹最大深度】【子文件夹结构字符串】
Function 更新文件夹结构信息(目标文件夹 As String, depth As Integer) As String
On Error Resume Next
Dim arr() As String, arrLen As Integer, baseDepth As String, i As Integer, currDepth As Integer, str As String
' 末尾有 \ 就去掉
目标文件夹 = VBA.IIf(Right(目标文件夹, 1) = "\", Left(目标文件夹, Len(目标文件夹) - 1), 目标文件夹)
' 目标文件夹作为基础深度
baseDepth = 统计字符串出现次数(目标文件夹, "\")
' 执行cmd命令获取所有子文件夹
str = 目标文件夹 & vbCrLf & 执行cmd命令("dir " & 目标文件夹 & " /ad /s /b")
' 按 vbCrLf 切分为数组
arr = Split(str, vbCrLf)
arrLen = UBound(arr)
' 遍历所有目录
For i = 0 To arrLen
currDepth = 统计字符串出现次数(arr(i), "\") - baseDepth
If Len(arr(i)) = 0 Or currDepth > depth Then
arr(i) = DEL_FLAG
Else
arr(i) = VBA.IIf(Right(arr(i), 1) <> "\", arr(i) & "\", arr(i))
End If
' 更新子文件夹最大深度
subFolderMaxLeve = VBA.IIf(currDepth > subFolderMaxLeve, currDepth, subFolderMaxLeve)
Next
' 过滤掉空字符串,得到结果
arr = Filter(arr, DEL_FLAG, False, vbTextCompare)
' 更新子文件夹结构数组
subFolderArr = arr
' 更新子文件夹结构字符串
subFolderString = Join(subFolderArr, vbCrLf)
' 获取相对文件(从根开始,所有目录放在一个数组中)
subFolderRelativePathArr = Split(Replace(subFolderString, 目标文件夹 & "\", ""), vbCrLf)
' 如果为空,给个空字符串,对应根路径。(不然后面取值拼接路径的时候,标越界直接崩)
If CStr(Join(subFolderRelativePathArr, "")) = "" Then
ReDim subFolderRelativePathArr(0) ' 重置动态数组的长度
subFolderRelativePathArr(0) = ""
End If
更新文件夹结构信息 = subFolderString
End Function
Function 复制文件夹结构(原文件夹 As String, 目标文件夹 As String)
Call 执行cmd命令("xcopy " & 原文件夹 & " " & 目标文件夹 & " /t/i")
End Function
Function 移除末尾空行(myString As String)
If Len(myString) > 0 Then
If Right$(myString, 2) = vbCrLf Or Right$(myString, 2) = vbNewLine Then
myString = Left$(myString, Len(myString) - 2)
End If
End If
移除末尾空行 = myString
End Function
性能优化
暂时感觉不出来。。。目前越来越慢的是word,但是又好像有当前这个Excel有关系。
Public CalcState As Long
Public EventState As Boolean
Public PageBreakState As Boolean
' 业务代码开始前执行
Sub OptimizeCode_Begin(app As Object)
On Error Resume Next
app.ScreenUpdating = False
EventState = app.EnableEvents
app.EnableEvents = False
CalcState = app.Calculation
app.Calculation = xlCalculationManual
PageBreakState = ActiveSheet.DisplayPageBreaks
ActiveSheet.DisplayPageBreaks = False
End Sub
' 业务代码结束后执行
Sub OptimizeCode_End(app As Object)
On Error Resume Next
ActiveSheet.DisplayPageBreaks = PageBreakState
app.Calculation = CalcState
app.EnableEvents = EventState
app.ScreenUpdating = True
End Sub
Sheet1(关键字)(工作表按钮事件)
表格中添加了一个按钮,用于打开窗口
Private Sub CommandButton1_Click()
UserForm1.Show
End Sub
ThisWorkbook(工作簿事件)
打开工作簿后自动弹出窗口
Private Sub Workbook_Activate()
UserForm1.Show
End Sub
引用 word
因为声明了word对象,需要引用一下库。
源文件
下载↑↑↑顶部预览图,用解压工具打开即可。
参考资料
MSDN - Learn / Office VBA 参考 / Word / 对象模型 / 查找 Word (对象)
MSDN - Learn / Office VBA 参考 / Word / 对象模型 / Find 对象 / 方法 / Find.Execute 方法 (Word)
Find and Replace using wildcards
How to fit image size on excel WebBrowser control
Word实现的那个旧版 —— VBA 收集 Word关键字批量处理