[UMU WSH 教程](43) WIA 应用实例 - 批量转换图片格式
[UMU WSH 教程](42) FSO 应用实例 - 批量删除文件
UMU WSH 教程代码下载:http://sdrv.ms/ZpPPaS
UMU WSH Git:http://git.oschina.net/umu618/umu-wsh
数据经常 in-in-out-out 难免产生重复,比如上次 UMU 在 Surface RT 上导入 iPhone4S 和 iPhone5 的照片时发生的悲剧:
把 iPhone 4S 的照片导到 #Surface#,然后刷机去越狱,后来又把 iPhone 5 的照片也导进来,没料到两个 iPhone 的文件夹是一样的名字,于是文件混到了一起,尼玛!里面有不少内容不一样文件名一样的照片,所以选择不覆盖,重新复制 5 的照片到另一个文件夹,但 4S 的文件夹里已经混入了好多 5 的照片!还好 #Surface# 支持 VBScript,哥的去重复文件脚本可以发挥作用了!生产力第一名的平板——其实就是给没有生产力的 iOS 设备打工的,哈!悲剧~
本篇要介绍的是使用 Surface RT 支持的 WSH 脚本实现查找重复文件这个任务,其中使用到的三个主要对象:FSO、SD 大家应该知道,WII 是 WindowsInstaller.Installer,参考:《[UMU WSH 教程](40) 利用 WindowsInstaller.Installer 对象计算文件 MD5 hash 值》一文,http://hi.baidu.com/umu618/item/0769e3cecd216e3298b4980a
先把思路介绍清楚,以便读者使用其它语言实现:1、计算 Hash 的代价是比较高的,而文件大小是很容易获取的,所以应该先利用文件大小来比较文件,只有大小重复的需要计算 Hash;2、空文件都是重复的,不用 Hash 了……3、由于要做成通用程序,所以只做了查找,您可以根据代码,自行添加删除带某些特征的重复文件。代码如下:
' 44_FindDuplicates.VBS
' UMU @ 10:23 2013/05/26
' [UMU WSH 教程](44) 查找重复文件
Option Explicit
Const APP_TITLE = "UMU.Script.Tools.FindDuplicates"
Dim fso
Dim wi
Dim file_name_dictionary, file_size_dictionary, file_size_and_hash_dictionary
Dim file_index
Dim group
Call Main()
Private Sub Main()
Set fso = CreateObject("Scripting.FileSystemObject")
Dim args
Set args = WScript.Arguments
If args.Count = 0 Then
Dim wsh
Dim send_to, copy_to
MsgBox "本程序用来查找重复文件(根据文件内容,非文件名)。" & vbCrLf & _
"请把要处理的文件或文件夹拖放到本程序的图标上!", _
vbInformation, APP_TITLE
Set wsh = CreateObject( "WScript.Shell" )
send_to = wsh.SpecialFolders("SendTo")
copy_to = send_to & "\FindDuplicates.VBE"
If Not fso.FileExists(copy_to) Then
If vbOK = MsgBox("本程序用来查找重复文件。" & vbCrLf & _
"请把要处理的文件或文件夹拖放到本程序的图标上!" & vbCrLf & vbCrLf & _
"提示:您可以把此文件放在 Sendto 目录里,然后使用右键菜单的“发送到”。" & vbCrLf & _
"您的 Sendto 目录是 " & send_to & vbCrLf & "按“确定”执行复制操作。", _
vbOKCancel + vbInformation, APP_TITLE) Then
fso.CopyFile WScript.ScriptFullName, copy_to
If vbYes = MsgBox("是否查看 Sendto 目录?", vbQuestion + vbYesNo, APP_TITLE) Then
wsh.Run "%SystemRoot%\explorer.exe /n, /select," & copy_to
End If
End If
End If
Set wsh = Nothing
Set args = Nothing
Set fso = Nothing
Exit Sub
End If
Set file_name_dictionary = CreateObject("Scripting.Dictionary")
Set file_size_dictionary = CreateObject("Scripting.Dictionary")
file_index = 0
Dim ar
For Each ar In args
If fso.FolderExists(ar) Then
Call AddFolder(ar)
ElseIf fso.FileExists(ar) Then
Call AddFile(ar)
End If
Next
Set ar = Nothing
Set args = Nothing
If file_name_dictionary.Count = file_size_dictionary.Count Then
MsgBox "文件总数:" & file_name_dictionary.Count & vbCrLf & _
"没有重复的文件", vbInformation, "整个世界清静了!"
ElseIf MsgBox("文件总数:" & file_name_dictionary.Count & vbCrLf & _
"文件大小不重复数:" & file_size_dictionary.Count, vbInformation + vbOkCancel, "按确定继续") = vbOK Then
Call FindDuplicates()
End If
Set fso = Nothing
Set file_name_dictionary = Nothing
Set file_size_dictionary = Nothing
MsgBox "重复组数:" & group, vbInformation, "整个世界清静了!"
End Sub
Private Sub AddFolder(ByVal folder_path)
'On Error Resume Next
Dim rfd, fs, f, fds, fd
Set rfd = fso.GetFolder(folder_path)
Set fs = rfd.Files
For Each f In fs
Call AddFileWithSize(f.Path, f.Size)
Next
Set fds = rfd.SubFolders
For Each fd In fds
Call AddFolder(fd.Path)
Next
End Sub
Private Sub AddFile(file_path)
'On Error Resume Next
Dim file
Set file = fso.GetFile(file_path)
Call AddFileWithSize(file.Path, file.Size)
Set file = Nothing
End Sub
Private Sub AddFileWithSize(file_path, file_size)
'On Error Resume Next
file_name_dictionary.Add file_index, file_path
If file_size_dictionary.Exists(file_size) Then
file_size_dictionary.Item(file_size) = file_size_dictionary.Item(file_size) & ";" & file_index
Else
file_size_dictionary.Add file_size, file_index
End If
file_index = file_index + 1
End Sub
Private Function BigEndianHex(int)
Dim result
Dim b1, b2, b3, b4
result = Right("0000000" & Hex(int), 8)
b1 = Mid(result, 7, 2)
b2 = Mid(result, 5, 2)
b3 = Mid(result, 3, 2)
b4 = Mid(result, 1, 2)
BigEndianHex = b1 & b2 & b3 & b4
End Function
Private Function GetFileHash(file_name)
Dim file_hash
Dim hash_value
Dim i
Set file_hash = wi.FileHash(file_name, 0)
hash_value = ""
For i = 1 To file_hash.FieldCount
hash_value = hash_value & BigEndianHex(file_hash.IntegerData(i))
Next
Set file_hash = Nothing
GetFileHash = hash_value
End Function
Private Sub FindDuplicates()
Set wi = CreateObject("WindowsInstaller.Installer")
Set file_size_and_hash_dictionary = CreateObject("Scripting.Dictionary")
Dim file_size_array
Dim file_name_index_array
Dim ubound_of_file_size_dictionary
' UMU: dictionary -> 2 arraies, for quick finding
file_size_array = file_size_dictionary.Keys
file_name_index_array = file_size_dictionary.Items
ubound_of_file_size_dictionary = file_size_dictionary.Count - 1
file_size_dictionary.RemoveAll
Dim i
For i = 0 To ubound_of_file_size_dictionary
If InStr(file_name_index_array(i), ";") <> 0 Then
If file_size_array(i) = 0 Then
' UMU: empty files are all the same
file_size_and_hash_dictionary.Add 0, file_name_index_array(i)
Else
Dim file_index_array
Dim index
file_index_array = Split(file_name_index_array(i), ";")
For Each index In file_index_array
Dim hash
Dim key
' UMU: CLng() is important
hash = GetFileHash(file_name_dictionary.Item(CLng(index)))
key = file_size_array(i) & ":" & hash
If file_size_and_hash_dictionary.Exists(key) Then
' UMU: we've got it!
file_size_and_hash_dictionary.Item(key) = file_size_and_hash_dictionary.Item(key) & ";" & index
Else
file_size_and_hash_dictionary.Add key, index
End If
Next
End If
End If
Next
Erase file_size_array
Erase file_name_index_array
' UMU: dictionary -> 2 arraies, for quick finding
Dim file_and_hash_array
Dim ubound_of_file_size_and_hash_dictionary
file_and_hash_array = file_size_and_hash_dictionary.Keys
file_name_index_array = file_size_and_hash_dictionary.Items
ubound_of_file_size_and_hash_dictionary = file_size_and_hash_dictionary.Count - 1
file_size_and_hash_dictionary.RemoveAll
Dim file
Dim cd
cd = WScript.ScriptFullName
cd = Left(cd, InStrRev(cd, "\"))
Set file = fso.CreateTextFile(cd & "Duplicates.txt")
group = 0
For i = 0 To ubound_of_file_size_and_hash_dictionary
If InStr(file_name_index_array(i), ";") <> 0 Then
group = group + 1
file.WriteLine "// Group " & group & ", " & file_and_hash_array(i)
file_index_array = Split(file_name_index_array(i), ";")
For Each index In file_index_array
file.WriteLine file_name_dictionary.Item(CLng(index))
Next
file.WriteLine ""
End If
Next
file.Close
Set file = Nothing
Erase file_and_hash_array
Erase file_name_index_array
Set wi = Nothing
Set file_size_and_hash_dictionary = Nothing
End Sub