文件IO是一个耗时操作,要尽量避免频繁读取磁盘。
而我们需要分析磁盘的占用空间,无法避免需要读取,但是期望对单个文件只通过一次文件IO操作,不重复读取,最终目标是要获取到某个目录下所有文件夹的信息。
所以需要一种方式可以仅进行一轮磁盘操作,就能获取到对应目录下的所有信息。
为了达到这个目标,做了以下尝试:
- 使用自定义的树结构来模拟文件夹结构
- 通过一次全局IO操作获取所有原始磁盘数据,原始数据保存在内存中
- 后续只操作内存,完成目录下所有子文件夹信息获取
CHFileTool.showFileInfo(rootPath: NSHomeDirectory())
核心计算类:
import Foundation
enum CHFileToolSizeType {
/// 使用磁盘大小计算
case byAllocatedSize
/// 使用字节大小计算
case byBiteSize
}
class CHFileTool: NSObject {
@discardableResult
static func showFileInfo(rootPath: String, sizeType: CHFileToolSizeType = .byAllocatedSize) -> Int {
// 1.一次IO操作,获取所有文件信息
var fileModelArray: [CHFileModel] = []
if sizeType == .byBiteSize {
// 使用字节大小计算
fileModelArray = self.getOriginFileInfo(rootPath: rootPath)
} else if sizeType == .byAllocatedSize {
// 使用磁盘占用计算
fileModelArray = self.getOriginFileInfoByTotleSize(rootPath: rootPath)
}
// 2.构建文件树结构, 计算文件夹大小
let rootModel = self.buildRootNode(rootPath: rootPath, originFileArray: fileModelArray)
// 3.输出文件夹信息
let dirArray = self.printAnalyzeInfo(rootModel: rootModel)
// 随机挑部分数据验证效果
let index = (0..<dirArray.count).randomElement()!
var randomNode = dirArray[index]
print(randomNode)
while let parentNode = randomNode.parentNode {
print(parentNode)
randomNode = parentNode
}
return rootModel.fileSize
}
// 使用原始数据构造 文件树结构
private static func buildRootNode(rootPath: String, originFileArray: [CHFileModel]) -> CHFileModel {
let rootName = (rootPath as NSString).lastPathComponent
let rootModel = CHFileModel()
rootModel.fileSize = 0
rootModel.filePath = rootName
rootModel.fileType = .directory
originFileArray.forEach { model in
let pathComponents = (model.filePath as NSString).pathComponents
self.buildTree(parenetNode: rootModel, currentNode: model, pathComponents: pathComponents)
}
rootModel.subNode.forEach { (key: String, value: CHFileModel) in
rootModel.fileSize += value.fileSize
}
return rootModel
}
// 进行数据统计
@discardableResult
private static func printAnalyzeInfo(rootModel: CHFileModel) -> [CHFileModel] {
// print("统计信息 -- start")
// rootModel.showOneLevelInfo()
var dirArray = [CHFileModel]()
self.getAllDirectory(rootNode: rootModel, dirArray: &dirArray)
// print("按照文件名排序")
dirArray.sort { pre, next in
pre.filePath < next.filePath
}
// print(dirArray)
// print("按照文件大小排序")
dirArray.sort { pre, next in
pre.fileSize > next.fileSize
}
// print(dirArray)
// print("统计信息 -- end")
return dirArray
}
}
// 一次IO操作,获取所有文件信息,记录到数组中, 方法1,2差别不大, 方法2更贴近真实占用
extension CHFileTool {
// 方法1: 通过文件的占用的字节数获取
private static func getOriginFileInfo(rootPath: String) -> [CHFileModel] {
let fileManager = FileManager.default
let fileArray = try? fileManager.subpathsOfDirectory(atPath: rootPath)
var fileModelArray = [CHFileModel]()
fileArray?.forEach { file in
let fullPath = rootPath + "/" + file
let att = try? fileManager.attributesOfItem(atPath: fullPath)
if let att {
let fileSize = att[FileAttributeKey.size] as? Int ?? 0
let fileType = att[FileAttributeKey.type] as? FileAttributeType
let fileModel = CHFileModel()
fileModel.fileSize = fileSize
fileModel.filePath = file
if let fileType {
if fileType == .typeRegular {
fileModel.fileType = .file
} else if fileType == .typeDirectory {
fileModel.fileType = .directory
} else {
// fatalError("不支持的文件类型 \(fileType)")
}
} else {
fatalError("文件类型获取失败, \(att)")
}
fileModelArray.append(fileModel)
} else {
fatalError("文件信息获取失败, \(fullPath)")
}
}
// print(fileModelArray)
return fileModelArray
}
// 方法2: 通过磁盘上分配的空间来获取文件占用大小.
// 一般来说, 文件在磁盘上的空间略大于文件的字节数, 因为存在磁盘空间对齐
// 特殊的某些文件存在系统压缩, 会出现磁盘空间小于文件字节数的情况
// 但是无论哪种, 磁盘上的空间更真实, 并且性能更好
private static func getOriginFileInfoByTotleSize(rootPath: String) -> [CHFileModel] {
let fileManager = FileManager.default
let keysArray: [URLResourceKey] = [
.isDirectoryKey,
.isRegularFileKey,
.fileAllocatedSizeKey,
.totalFileAllocatedSizeKey,
]
let keysSet = Set(keysArray)
let directoryURL = URL(filePath: rootPath)
let enumerator = fileManager.enumerator(at: directoryURL, includingPropertiesForKeys: keysArray)!
var fileModelArray = [CHFileModel]()
for item in enumerator {
if let contentItemURL = item as? URL {
if let resourceValues = try? contentItemURL.resourceValues(forKeys: keysSet) {
let fileModel = CHFileModel()
fileModel.filePath = contentItemURL.relativePath.replacingOccurrences(of: "\(rootPath)/", with: "")
if let isDirectory = resourceValues.isDirectory, isDirectory {
fileModel.fileType = .directory
} else if let isRegularFile = resourceValues.isRegularFile, isRegularFile {
fileModel.fileType = .file
} else {
// fatalError("不支持的类型\(resourceValues)")
}
// 文件在磁盘中的真实大小
if let fileSize = resourceValues.totalFileAllocatedSize {
fileModel.fileSize = fileSize
} else if let fileSize = resourceValues.fileAllocatedSize {
fileModel.fileSize = fileSize
} else {
// fatalError("文件大小无法获取")
}
fileModelArray.append(fileModel)
}
}
}
// print(fileModelArray)
return fileModelArray
}
}
// MARK: - 递归方法
extension CHFileTool {
// 遍历获取根节点下所有文件夹信息
private static func getAllDirectory(rootNode: CHFileModel, dirArray: inout [CHFileModel] ) {
if rootNode.fileType == .directory {
dirArray.append(rootNode)
}
rootNode.subNode.forEach { (key: String, value: CHFileModel) in
self.getAllDirectory(rootNode: value, dirArray: &dirArray)
}
}
// 构造树结构
private static func buildTree(parenetNode: CHFileModel, currentNode: CHFileModel, pathComponents: [String]) {
if pathComponents.isEmpty {
return
}
var nextPathComponents = pathComponents
let currentPath = nextPathComponents.removeFirst()
// 查子路径
// 子路径存在, 增加文件大小, 继续分解pathComponents
// 子路径不存在, 创建子路径, 保存子路径, 分解pathComponents
if let subNode = parenetNode.subNode[currentPath] {
subNode.fileSize += currentNode.fileSize
self.buildTree(parenetNode: subNode, currentNode: currentNode, pathComponents: nextPathComponents)
} else {
let subNode = CHFileModel()
subNode.fileSize = currentNode.fileSize
subNode.filePath = parenetNode.filePath + "/" + currentPath
subNode.parentNode = parenetNode
if pathComponents.count > 1 {
subNode.fileType = .directory
} else {
subNode.fileType = currentNode.fileType
}
parenetNode.subNode[currentPath] = subNode
self.buildTree(parenetNode: subNode, currentNode: currentNode, pathComponents: nextPathComponents)
}
}
}
数据结构定义:
import Foundation
enum CHFileType: String {
// 占位
case none
// 文件类型
case file
// 目录类型
case directory
}
class CHFileModel: NSObject {
// 文件路径
var filePath: String = ""
// 文件大小
var fileSize: Int = 0
// 文件类型
var fileType: CHFileType = .none
// 父节点
var parentNode: CHFileModel?
// 目录类型下 有子节点, 文件类型为空字典
var subNode: [String: CHFileModel] = [:]
@discardableResult
func showOneLevelInfo() -> String {
var result = self.description
self.subNode.forEach { (key: String, value: CHFileModel) in
result.append("\t" + value.description)
}
print(result)
return result
}
override var description: String {
get {
return self.filePath + " 文件大小: \(readAbleFileSize(CGFloat(self.fileSize))) " + "文件类型: \(self.fileType.rawValue)\n"
}
}
}
辅助工具文件:
import Foundation
// 左侧为CGFloat, 右侧为Int
public func * (left: CGFloat, right: Int) -> CGFloat {
return left * CGFloat(right)
}
public func + (left: CGFloat, right: Int) -> CGFloat {
return left + CGFloat(right)
}
public func - (left: CGFloat, right: Int) -> CGFloat {
return left - CGFloat(right)
}
public func / (left: CGFloat, right: Int) -> CGFloat {
if right == 0 {
return CGFloat.nan
} else {
return left * CGFloat(right)
}
}
// 左侧为Int, 右侧为CGFloat
public func * (left: Int, right: CGFloat) -> CGFloat {
return CGFloat(left) * right
}
public func + (left: Int, right: CGFloat) -> CGFloat {
return CGFloat(left) + right
}
public func - (left: Int, right: CGFloat) -> CGFloat {
return CGFloat(left) - right
}
public func / (left: Int, right: CGFloat) -> CGFloat {
if right == 0 {
return CGFloat.nan
} else {
return CGFloat(left) / right
}
}
找资料的时候发现,有2种方式计算文件的磁盘大小
- 方案1:通过文件的占用的字节数获取
- 使用att[FileAttributeKey.size] 累加和前面的字节数累加;
- 这种方式获取出字节数累加是1.79G
-
方案2: 通过磁盘上分配的空间来获取文件占用大小
-
使用totalFileAllocatedSize,实测这种与显示简介中的磁盘数值一样;
-
这种方式获取出来的是1.91G
-
实测方案2, 获取的大小更真实, 并且性能更好。
完整代码地址:
https://github.com/guochaoshun/directoryTool
参考链接:
https://www.itguest.com/post/feidce2b6.html
https://gist.github.com/NikolaiRuhe/408cefb953c4bea15506a3f80a3e5b96
https://github.com/NikolaiRuhe/NRFoundation/blob/master/NRFoundation/NRFileManager.m
import Foundation
public extension FileManager {
/// Calculate the allocated size of a directory and all its contents on the volume.
///
/// As there's no simple way to get this information from the file system the method
/// has to crawl the entire hierarchy, accumulating the overall sum on the way.
/// The resulting value is roughly equivalent with the amount of bytes
/// that would become available on the volume if the directory would be deleted.
///
/// - note: There are a couple of oddities that are not taken into account (like symbolic links, meta data of
/// directories, hard links, ...).
func allocatedSizeOfDirectory(at directoryURL: URL) throws -> UInt64 {
// The error handler simply stores the error and stops traversal
var enumeratorError: Error? = nil
func errorHandler(_: URL, error: Error) -> Bool {
enumeratorError = error
return false
}
// We have to enumerate all directory contents, including subdirectories.
let enumerator = self.enumerator(at: directoryURL,
includingPropertiesForKeys: Array(allocatedSizeResourceKeys),
options: [],
errorHandler: errorHandler)!
// We'll sum up content size here:
var accumulatedSize: UInt64 = 0
// Perform the traversal.
for item in enumerator {
// Bail out on errors from the errorHandler.
if enumeratorError != nil { break }
// Add up individual file sizes.
let contentItemURL = item as! URL
accumulatedSize += try contentItemURL.regularFileAllocatedSize()
}
// Rethrow errors from errorHandler.
if let error = enumeratorError { throw error }
return accumulatedSize
}
}
fileprivate let allocatedSizeResourceKeys: Set<URLResourceKey> = [
.isRegularFileKey,
.fileAllocatedSizeKey,
.totalFileAllocatedSizeKey,
]
fileprivate extension URL {
func regularFileAllocatedSize() throws -> UInt64 {
let resourceValues = try self.resourceValues(forKeys: allocatedSizeResourceKeys)
// We only look at regular files.
guard resourceValues.isRegularFile ?? false else {
return 0
}
// To get the file's size we first try the most comprehensive value in terms of what
// the file may use on disk. This includes metadata, compression (on file system
// level) and block size.
// In case totalFileAllocatedSize is unavailable we use the fallback value (excluding
// meta data and compression) This value should always be available.
return UInt64(resourceValues.totalFileAllocatedSize ?? resourceValues.fileAllocatedSize ?? 0)
}
}