树结构实战,获取文件夹大小

文件IO是一个耗时操作,要尽量避免频繁读取磁盘。

而我们需要分析磁盘的占用空间,无法避免需要读取,但是期望对单个文件只通过一次文件IO操作,不重复读取,最终目标是要获取到某个目录下所有文件夹的信息。

所以需要一种方式可以仅进行一轮磁盘操作,就能获取到对应目录下的所有信息。

为了达到这个目标,做了以下尝试:

  • 使用自定义的树结构来模拟文件夹结构
  • 通过一次全局IO操作获取所有原始磁盘数据,原始数据保存在内存中
  • 后续只操作内存,完成目录下所有子文件夹信息获取

CHFileTool.showFileInfo(rootPath: NSHomeDirectory())

核心计算类: 

import Foundation

enum CHFileToolSizeType {
    /// 使用磁盘大小计算
    case byAllocatedSize
    /// 使用字节大小计算
    case byBiteSize
}

class CHFileTool: NSObject {

    @discardableResult
    static func showFileInfo(rootPath: String, sizeType: CHFileToolSizeType = .byAllocatedSize) -> Int {

        // 1.一次IO操作,获取所有文件信息
        var fileModelArray: [CHFileModel] = []
        if sizeType == .byBiteSize {
            // 使用字节大小计算
            fileModelArray = self.getOriginFileInfo(rootPath: rootPath)
        } else if sizeType == .byAllocatedSize {
            // 使用磁盘占用计算
            fileModelArray = self.getOriginFileInfoByTotleSize(rootPath: rootPath)
        }

        // 2.构建文件树结构, 计算文件夹大小
        let rootModel = self.buildRootNode(rootPath: rootPath, originFileArray: fileModelArray)

        // 3.输出文件夹信息
        let dirArray = self.printAnalyzeInfo(rootModel: rootModel)

        // 随机挑部分数据验证效果
        let index = (0..<dirArray.count).randomElement()!
        var randomNode = dirArray[index]
        print(randomNode)
        while let parentNode = randomNode.parentNode {
            print(parentNode)
            randomNode = parentNode
        }
        return rootModel.fileSize
    }

    // 使用原始数据构造 文件树结构
    private static func buildRootNode(rootPath: String, originFileArray: [CHFileModel]) -> CHFileModel {
        let rootName = (rootPath as NSString).lastPathComponent

        let rootModel = CHFileModel()
        rootModel.fileSize = 0
        rootModel.filePath = rootName
        rootModel.fileType = .directory

        originFileArray.forEach { model in
            let pathComponents = (model.filePath as NSString).pathComponents
            self.buildTree(parenetNode: rootModel, currentNode: model, pathComponents: pathComponents)
        }

        rootModel.subNode.forEach { (key: String, value: CHFileModel) in
            rootModel.fileSize += value.fileSize
        }

        return rootModel

    }

    // 进行数据统计
    @discardableResult
    private static func printAnalyzeInfo(rootModel: CHFileModel) -> [CHFileModel] {


        //        print("统计信息 -- start")
        //        rootModel.showOneLevelInfo()

        var dirArray = [CHFileModel]()
        self.getAllDirectory(rootNode: rootModel, dirArray: &dirArray)

        //        print("按照文件名排序")
        dirArray.sort { pre, next in
            pre.filePath < next.filePath
        }
        //        print(dirArray)


        //        print("按照文件大小排序")
        dirArray.sort { pre, next in
            pre.fileSize > next.fileSize
        }
        //        print(dirArray)
        //        print("统计信息 -- end")

        return dirArray
    }
}

// 一次IO操作,获取所有文件信息,记录到数组中, 方法1,2差别不大, 方法2更贴近真实占用
extension CHFileTool {

    // 方法1: 通过文件的占用的字节数获取
    private static func getOriginFileInfo(rootPath: String) -> [CHFileModel] {

        let fileManager = FileManager.default

        let fileArray = try? fileManager.subpathsOfDirectory(atPath: rootPath)

        var fileModelArray = [CHFileModel]()
        fileArray?.forEach { file in
            let fullPath = rootPath + "/" + file
            let att = try? fileManager.attributesOfItem(atPath: fullPath)
            if let att {

                let fileSize = att[FileAttributeKey.size] as? Int ?? 0
                let fileType = att[FileAttributeKey.type] as? FileAttributeType
                let fileModel = CHFileModel()
                fileModel.fileSize = fileSize
                fileModel.filePath = file
                if let fileType {
                    if fileType == .typeRegular {
                        fileModel.fileType = .file
                    } else if fileType == .typeDirectory {
                        fileModel.fileType = .directory
                    } else {
//                        fatalError("不支持的文件类型 \(fileType)")
                    }
                } else {
                    fatalError("文件类型获取失败, \(att)")
                }
                fileModelArray.append(fileModel)
            } else {
                fatalError("文件信息获取失败, \(fullPath)")
            }
        }

//        print(fileModelArray)
        return fileModelArray
    }

    // 方法2: 通过磁盘上分配的空间来获取文件占用大小.
    // 一般来说, 文件在磁盘上的空间略大于文件的字节数, 因为存在磁盘空间对齐
    // 特殊的某些文件存在系统压缩, 会出现磁盘空间小于文件字节数的情况
    // 但是无论哪种, 磁盘上的空间更真实, 并且性能更好
    private static func getOriginFileInfoByTotleSize(rootPath: String) -> [CHFileModel] {

        let fileManager = FileManager.default

        let keysArray: [URLResourceKey] = [
            .isDirectoryKey,
            .isRegularFileKey,
            .fileAllocatedSizeKey,
            .totalFileAllocatedSizeKey,
        ]

        let keysSet = Set(keysArray)

        let directoryURL = URL(filePath: rootPath)
        let enumerator = fileManager.enumerator(at: directoryURL, includingPropertiesForKeys: keysArray)!
        var fileModelArray = [CHFileModel]()

        for item in enumerator {

            if let contentItemURL = item as? URL {
                if let resourceValues = try? contentItemURL.resourceValues(forKeys: keysSet) {

                    let fileModel = CHFileModel()
                    fileModel.filePath =  contentItemURL.relativePath.replacingOccurrences(of: "\(rootPath)/", with: "")
                    if let isDirectory = resourceValues.isDirectory, isDirectory {
                        fileModel.fileType = .directory
                    } else if let isRegularFile = resourceValues.isRegularFile, isRegularFile {
                        fileModel.fileType = .file
                    } else {
//                        fatalError("不支持的类型\(resourceValues)")
                    }

                    // 文件在磁盘中的真实大小
                    if let fileSize = resourceValues.totalFileAllocatedSize {
                        fileModel.fileSize = fileSize
                    } else if let fileSize = resourceValues.fileAllocatedSize {
                        fileModel.fileSize = fileSize
                    } else {
//                        fatalError("文件大小无法获取")
                    }
                    fileModelArray.append(fileModel)
                }
            }
        }
//        print(fileModelArray)
        return fileModelArray
    }

}

// MARK: - 递归方法
extension CHFileTool {

    // 遍历获取根节点下所有文件夹信息
    private static func getAllDirectory(rootNode: CHFileModel, dirArray: inout [CHFileModel] ) {

        if rootNode.fileType == .directory {
            dirArray.append(rootNode)
        }
        rootNode.subNode.forEach { (key: String, value: CHFileModel) in
            self.getAllDirectory(rootNode: value, dirArray: &dirArray)
        }
    }


    // 构造树结构
    private static func buildTree(parenetNode: CHFileModel, currentNode: CHFileModel, pathComponents: [String]) {

        if pathComponents.isEmpty {
            return
        }

        var nextPathComponents = pathComponents
        let currentPath = nextPathComponents.removeFirst()

        // 查子路径
        // 子路径存在, 增加文件大小, 继续分解pathComponents
        // 子路径不存在, 创建子路径, 保存子路径, 分解pathComponents
        if let subNode = parenetNode.subNode[currentPath] {
            subNode.fileSize += currentNode.fileSize
            self.buildTree(parenetNode: subNode, currentNode: currentNode, pathComponents: nextPathComponents)

        } else {

            let subNode = CHFileModel()
            subNode.fileSize = currentNode.fileSize
            subNode.filePath = parenetNode.filePath + "/" + currentPath
            subNode.parentNode = parenetNode
            if pathComponents.count > 1 {
                subNode.fileType = .directory
            } else {
                subNode.fileType = currentNode.fileType
            }
            parenetNode.subNode[currentPath] = subNode
            self.buildTree(parenetNode: subNode, currentNode: currentNode, pathComponents: nextPathComponents)

        }
    }

}

数据结构定义:


import Foundation

enum CHFileType: String {
    // 占位
    case none
    // 文件类型
    case file
    // 目录类型
    case directory
}

class CHFileModel: NSObject {

    // 文件路径
    var filePath: String = ""
    // 文件大小
    var fileSize: Int = 0
    // 文件类型
    var fileType: CHFileType = .none

    // 父节点
    var parentNode: CHFileModel?

    // 目录类型下 有子节点, 文件类型为空字典
    var subNode: [String: CHFileModel] = [:]

    @discardableResult
    func showOneLevelInfo() -> String {
        var result = self.description

        self.subNode.forEach { (key: String, value: CHFileModel) in
            result.append("\t" + value.description)
        }
        print(result)
        return result
    }


    override var description: String {
        get {
            return self.filePath + " 文件大小: \(readAbleFileSize(CGFloat(self.fileSize))) "  + "文件类型: \(self.fileType.rawValue)\n"
        }
    }
}

辅助工具文件: 

import Foundation

// 左侧为CGFloat, 右侧为Int
public func * (left: CGFloat, right: Int) -> CGFloat {
    return left * CGFloat(right)
}


public func + (left: CGFloat, right: Int) -> CGFloat {
    return left + CGFloat(right)
}


public func - (left: CGFloat, right: Int) -> CGFloat {
    return left - CGFloat(right)
}


public func / (left: CGFloat, right: Int) -> CGFloat {
    if right == 0 {
        return CGFloat.nan
    } else {
        return left * CGFloat(right)
    }
}

// 左侧为Int, 右侧为CGFloat
public func * (left: Int, right: CGFloat) -> CGFloat {
    return CGFloat(left) * right
}


public func + (left: Int, right: CGFloat) -> CGFloat {
    return CGFloat(left) + right
}


public func - (left: Int, right: CGFloat) -> CGFloat {
    return CGFloat(left) - right
}


public func / (left: Int, right: CGFloat) -> CGFloat {
    if right == 0 {
        return CGFloat.nan
    } else {
        return CGFloat(left) / right
    }
}

找资料的时候发现,有2种方式计算文件的磁盘大小

  • 方案1:通过文件的占用的字节数获取
    • 使用att[FileAttributeKey.size] 累加和前面的字节数累加;
    • 这种方式获取出字节数累加是1.79G
  • 方案2: 通过磁盘上分配的空间来获取文件占用大小

    • 使用totalFileAllocatedSize,实测这种与显示简介中的磁盘数值一样;

    • 这种方式获取出来的是1.91G

实测方案2, 获取的大小更真实, 并且性能更好。

完整代码地址:

https://github.com/guochaoshun/directoryTool

参考链接:

https://www.itguest.com/post/feidce2b6.html

https://gist.github.com/NikolaiRuhe/408cefb953c4bea15506a3f80a3e5b96

https://github.com/NikolaiRuhe/NRFoundation/blob/master/NRFoundation/NRFileManager.m



import Foundation

public extension FileManager {

    /// Calculate the allocated size of a directory and all its contents on the volume.
    ///
    /// As there's no simple way to get this information from the file system the method
    /// has to crawl the entire hierarchy, accumulating the overall sum on the way.
    /// The resulting value is roughly equivalent with the amount of bytes
    /// that would become available on the volume if the directory would be deleted.
    ///
    /// - note: There are a couple of oddities that are not taken into account (like symbolic links, meta data of
    /// directories, hard links, ...).

    func allocatedSizeOfDirectory(at directoryURL: URL) throws -> UInt64 {

        // The error handler simply stores the error and stops traversal
        var enumeratorError: Error? = nil
        func errorHandler(_: URL, error: Error) -> Bool {
            enumeratorError = error
            return false
        }

        // We have to enumerate all directory contents, including subdirectories.
        let enumerator = self.enumerator(at: directoryURL,
                                         includingPropertiesForKeys: Array(allocatedSizeResourceKeys),
                                         options: [],
                                         errorHandler: errorHandler)!

        // We'll sum up content size here:
        var accumulatedSize: UInt64 = 0

        // Perform the traversal.
        for item in enumerator {

            // Bail out on errors from the errorHandler.
            if enumeratorError != nil { break }

            // Add up individual file sizes.
            let contentItemURL = item as! URL
            accumulatedSize += try contentItemURL.regularFileAllocatedSize()
        }

        // Rethrow errors from errorHandler.
        if let error = enumeratorError { throw error }

        return accumulatedSize
    }
}


fileprivate let allocatedSizeResourceKeys: Set<URLResourceKey> = [
    .isRegularFileKey,
    .fileAllocatedSizeKey,
    .totalFileAllocatedSizeKey,
]


fileprivate extension URL {

    func regularFileAllocatedSize() throws -> UInt64 {
        let resourceValues = try self.resourceValues(forKeys: allocatedSizeResourceKeys)

        // We only look at regular files.
        guard resourceValues.isRegularFile ?? false else {
            return 0
        }

        // To get the file's size we first try the most comprehensive value in terms of what
        // the file may use on disk. This includes metadata, compression (on file system
        // level) and block size.

        // In case totalFileAllocatedSize is unavailable we use the fallback value (excluding
        // meta data and compression) This value should always be available.

        return UInt64(resourceValues.totalFileAllocatedSize ?? resourceValues.fileAllocatedSize ?? 0)
    }
}

  • 11
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值