取证应用程序

最新推荐文章于 2024-08-23 19:57:08 发布

zwlww1

最新推荐文章于 2024-08-23 19:57:08 发布

阅读量553

点赞数

本文链接：https://blog.csdn.net/zwlww1/article/details/79121494

版权

Pfish.py

# -*- coding:UTF-8 -*-
#
# p-fish : Python File System Hash Program
# Author: C. Hosmer
# July 2013
# Version 1.0
#
import logging           #Python Standard Library Logger
import time                #Python Standard Library time functions
import sys                  #Python Library System specific parameters
import _pfish             #_pfish Support Function Module 



if __name__ == '__main__':       
    # 软件版本号
    PFISH_VERSION = '1.0'
    
    # Turn on Logging
    # 将日志存储在pFishLog.log文件中，设置日志记录级别为DEBUG，并且 对每一个日志事件记录时间及日期
    # DEBUG级别是最低级别，将确保所有送往日志记录器的消息都是可见的
    logging.basicConfig(filename='pFishLog.log', level=logging.DEBUG, format='%(asctime)s %(message)s')
    
    # Process the Command Line Arguments
    # 调用函数，将控制转到处理命令行参数，前缀_pfish因为此函数存在于_pfish模块中
    # 如果解析成功，函数将返回此处，否则将向用户发送一条消息并退出程序
    _pfish.ParseCommandLine()
    
    #Record the Starting Time
    # 计算程序当前启动时间，以计算出处理所花费的时间(取证角度：调查中时间是一个关键因素)
    # time.time() 从纪元 起所逝去的秒数
    startTime = time.time()
    
    # Record the Welcome Message
    # 程序向日志发送一条消息，报告扫描的开始，仅在命令行中选择了verbose选项时才会在屏幕上显示此消息
    logging.info('')
    logging.info('Welcome to p-fish version' + PFISH_VERSION + '...New Scan Started')
    logging.info('')
    # 这里可以添加关于组织、调查员姓名、案件编号以及其它案件与相关信息。
    _pfish.DisplayMessage('Welcome to p-fish ...version' + PFISH_VERSION)
    
    # Record some information regarding the System
    logging.info('System: ' + sys.platform)
    logging.info('Version: ' + sys.version)
    
    # Traverse the file system directories and hash the files
    # 启动函数，从指定的根路径开始遍历目录结构，返回成功处理的文件数量
    filesProcessed = _pfish.WalkPath()
    
    # Record the end time and calculate the duration
    # 通过endTime - startTime可以确定执行文件系统哈希操作花费的秒数，可以转化为天、小时、分钟等形式
    endTime = time.time()
    duration = endTime - startTime
    logging.info('Files Processed: ' + str(filesProcessed))
    logging.info('Elapsed Time: ' + str(duration) + 'seconds')
    logging.info('')
    logging.info('Program Terminated Normally')
    logging.info('')
    
    _pfish.DisplayMessage('程序运行:  ' + str(duration) + ' 秒')
    _pfish.DisplayMessage("Program End")

_Pfish.py

#-*- coding: UTF-8 -*-
#
# pfish support functions, where all the real work gets done
#

# Display Message()    ParseCommandLine()    WalkPath()
# HashFile()           class_CVSWriter
# ValidateDirectory()    ValidateDirectoryWritable()
#

import os
import stat
import time
import hashlib
import argparse
import csv
import logging

log = logging.getLogger('main._pfish')

#
# Name: ParseCommand() Function
#
# Desc: Process and Validate the command line arguments
#                  use Python Standard Library module argparse
#
# Input: none
#
# Actions:
#                Uses the standard library argparse to process the command line
#                establisthes a global variable gl_args where any of the functions can
#                obtain argument information
#
def ParseCommandLine():
    parser = argparse.ArgumentParser('Python file system hashing ..p-fish')
    # 指定此项，任何对DisplayMessage()函数的调用将显示到标准输出设备，否则程序将静默运行
    parser.add_argument('-v', '--verbose', help='allows progress message to be displayed', action='store_true')
    
    #setup a group where the selection is mutually exclusive and required.
    # 哈希类型选择，为互斥选项，必须做出单项选项，否则程序终止。
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--md5', help='specififes MD5 algorithm', action='store_true')
    group.add_argument('--sha256', help='specififes SHA256 algorithm', action='store_true')
    group.add_argument('--sha512', help='specififes SHA512 algorithm', action='store_true')
    # 允许用户指定遍历的开始或根路径
    parser.add_argument('-d', '--rootPath', type=ValidateDirectory, required=True, help="specify the root path for  hashing")
    # 允许用户指定生成的报告文件将要写入的目录
    parser.add_argument('-r', '--reportPath', type=ValidateDirectoryWritable, required=True, help="specify the path for reports and logs will be written")
    # type选项不同，因为要求argparse验证指定的类型，使用-d选项时确认rootPath是存在并且可读的，且reportPath则必
    # 须存在且可写，由于argparse没有内置验证目录的函数，所以创建了函数ValidateDirectory()和ValidateDirectoryWritable()
    
    
    #create a global object to hold the validated arguments, these will be available then
    #to all the Function within the_pfish.py module
    # 生成全局变量存储选项生成的参数
    global gl_args
    global gl_hashType

    gl_args = parser.parse_args()
    # 互斥选项中通过参数选择后，结果会被置为True
    if gl_args.md5:
        gl_hashType = 'MD5'
    elif gl_args.sha256:
        gl_hashType = 'SHA256'
    elif gl_args.sha512:
        gl_hashType = 'SHA512'
    else:
        gl_hashType = "Unknown"
        logging.error('Unknown Hash Type Specified')
    DisplayMessage("Command line processed: Successfully")
    return

# End ParseCommandLine ====================================================
#
# Name: WalkPath() Function
#,
# Desc: Walk the path specified on the command line use Python Standard Library module os and sys
#
# Input: none, uses command line arguments
#
# Actions:
#                  Uses the standard Library modules os and sys to traverse the directory structure starting a root
#                  path specified by the user. For each file discovered. WalkPath will call the Function HashFile()
#                  to perform the file hashing
# 

def WalkPath():
    # 记录成功处理的文件数量
    processCount = 0
    errorCount = 0
    # 初始化CVS
    oCVS = _CSVWriter(gl_args.reportPath + 'fileSystemReport.csv', gl_hashType)
    
    #Create a loop that process all the files starting at the root Path, all sub-directories will also be processed
    # 向日志文件发送一消息，以记录根路径的值
    log.info('Root Path: ' + gl_args.rootPath)
    # 使用os.walk方法和rootPath创建一个循环，建立一个将会在下一个循环中处理文件名的清单
    for root, dirs, files in os.walk(gl_args.rootPath):
        #for each file obtain the filename and call the HashFile Function
        # 循环处理清单中的每一个文件
        for file in files:
            # 生成加入路径的文件名
            fname = os.path.join(root, file,)
            # 调用HashFile函数对CSV写入器访问，将哈希运算结果写入CSV文件中
            result = HashFile(fname, file, oCVS)
            #if hashing was successful then increment the processCount
            # processCount和errorCount的值会根据情况相应地增加
            if result is True:
                processCount += 1
            #if not successful, the increment the errorCount
            else:
                errorCount += 1
            # 所有目录和文件处理完成后,CVSWriter便会关闭，并且函数会向主程序返回成功处理的文件数目
    oCVS.writerClose()
    return(processCount)

#End WalkPath ===============================================================
#
# Name: HashFile Function
#
# Desc: Processes a single file which includes performing a hash of the file and the extraction of metadata
#                  garding the file processed use Python Standard Library modules hashlib, is, and sys
#
# Input: theFile = the full path of the file
#           simpleName = just the filename itselt
#
# Actions:
#                 Attempts to hash the file and extract metadata Call GenerateReport for successful hashed files
#
def HashFile(theFile, simpleName, o_result):
    #Verify that the path is valid
    # 每一项验证都有一个相应的错误记录，并在错误发生时发送到日志文件中，如果一个文件被忽略，程序将返回
    # WalkFile，然后处理下一个文件
    # 路径是否存在？
    if os.path.exists(theFile):
        #Verify that the path is not a symbolic link
        # 路径是一个链接而不是一个真实的文件吗？
        if not os.path.islink(theFile):
            #Verify that the file is real
            # 文件是真实的吗？
            if os.path.isfile(theFile):
                try:
                    #Attempt to open the file
                    # 使用‘rb’只读方式打开读取文件，避免因为权限、文件被锁或已损坏等原因造成的无法读取
                    f = open(theFile, 'rb')
                except IOError:
                    # if open fails report the error
                    log.warning('Open Failed: ' + theFile)
                    return
                else:
                    try:
                        #Attempt to read the file
                        rd = f.read()
                    except IOError:
                        # if read fails, the close the file and report error
                        f.close()
                        log.warning('Read Failed: ' + theFile)
                        return
                    else:
                        #success the is open and we can read from it lets query the file stats
                        # 文件成功打开并且允许读取，就将与文件相关属性提取出来
                        # 属性包括所有者、分给、大小、MAC时间和模式，这些属性将记录到CSV文件中
                        theFileStats = os.stat(theFile)
                        (mode, info, dev, nlink, uid, gid, size, atime, mtime, ctime)  = os.stat(theFile)
                    
                        #print the simple file name
                        DisplayMessage("Processing File: " + theFile)
                    
                        #print the size of the file in Bytes
                        fileSize = str(size)
                    
                        #print MAC Times
                        modifiedTime = time.ctime(mtime)
                        accessTime = time.ctime(atime)
                        createdTime = time.ctime(ctime)
                    
                        ownerID = str(uid)
                        groupID = str(gid)
                        fileMode = bin(mode)
                    
                    #process the file hashes
                    if gl_args.md5:
                        #Calcuation and Print the MD5
                        hash = hashlib.md5()
                        hash.update(rd)
                        hexMD5 = hash.hexdigest()
                        hashValue = hexMD5.upper()
                    elif gl_args.sha256:
                        hash = hashlib.sha256()
                        hash.update(rd)
                        hexSHA256 = hash.hexdigest()
                        hashValue = hexSHA256.upper()
                    elif gl_args.sha512:
                        # Calculate and Print the SHA512
                        hash = hashlib.sha512()
                        hash.update(rd)
                        hexSHA512 = hash.hexdigest()
                        hashValue = hexSHA512.upper()
                    else:
                        log.error('Hash not Selected')
                        #File processing completed
                        #Close the Active File
                        print "=========================================="
                        f.close()
                        
                    # write one row to the output file
                    # 文件处理完成，使用CSV类将记录写入报告文件，然后成功返回调用函数WalkPath
                    o_result.writeCSVRow(simpleName, theFile, fileSize, modifiedTime, accessTime, createdTime, hashValue, ownerID, groupID, mode)
                    return True
            # 发送警告消息到日志文件，警告信息与文件处理过程中遇到的问题相关
            else:
                log.warning('[' + repr(simpleName) + ', Skipped NOT a File' + ']')
                return False
        else:
            log.warning('[' + repr(simpleName) + ', Skipped Link NOT a File' + ']' )
            return False
    else:
        log.warning('[' + repr(simpleName) + ', Path does NOT exist' + ']')
        return False
# End HashFile Funtion====================================================
#
# Name: ValidateDirectory Function
#
#
# Desc: Function that will validate a directory path as existing and readable. Used for argument 
#           validation only
#
# Input: a directory path string
#
# Actions:
#                if valid will return the Directory String
#                if invalid it will raise an ArgumentTypeError within argparse
#                which will in turn be reported by argparse to the user
#
def ValidateDirectory(theDir):
    # Validate the path is a directory
    if not os.path.isdir(theDir):
        raise argparse.ArgumentTypeError('Directory does not exist')
    #Validate the path is readable
    if os.access(theDir, os.R_OK):
        return theDir
    else:
        raise argparse.ArgumentTypeError('Directory is not readable')
#End ValidateDirectory ==============================================
#
# Name: ValidateDirectoryWritable Function
#
# Desc: Function that will validate a directory path as existing and writable. Used for 
#             argument validation only
#
# Input: a directory path String
#
# Actions:
#                       if valid will return the Directory String
#
#                       if invalid it will raise an ArgumentTypeError within argparse which will
#                       in turn be reported by argparse to the user
#
def ValidateDirectoryWritable(theDir):
    # Validate the path is a directory
    # 测试目录是否字符串真实存在，如果失败便在argparse里引发一个错误并显示"Directory does not exist"消息
    if not os.path.isdir(theDir):
        raise argparse.ArgumentTypeError('Directory does not exist')
    
    #Validate the path is writable
    # 检验目录是否写权限，同样，如果失败会引发一个错误并显示消息
    if os.access(theDir, os.W_OK):
        return theDir
    else:
        raise argparse.ArgumentTypeError('Directory is not writable')
## End ValidateDirectoryWritable====================================================
#
# Name: DisplayMessage() Function
#
#
# Desc: Displays the message if the verbose if the verbose command line option is present 
#
# Input: message type string
#
# Actions:
#                  Uses the standard library print function to display the message
#                
def DisplayMessage(msg):
    if gl_args.verbose:
        print(msg)
    return
## End DisplayMessage====================================================
#
# Class: _CSVWriter
#
#
# Desc: Handles all methods related to comma separated value operations
#

#
# Methods constructor :              Initializes the CSV File
#                  writeCVSRow: Wirtes a single row to the csv file
#                  writerClose:     Close the CSV File
#
class _CSVWriter:
    def __init__(self, fileName, hashType):
        try:
            # Create a writer object and then write the header row
            # 打开输出文件csvFile
            self.csvFile = open(fileName, 'wb')
            # 初始化csv.writer
            self.writer = csv.writer(self.csvFile, delimiter=',', quoting=csv.QUOTE_ALL)
            # 写出所有列名组成标题行
            self.writer.writerow(('File', 'Path', 'Size', 'modified Time', 'Access Time', 'Created Time', hashType, 'Owner', 'Group', 'Mode'))
        except:
            # 如果在初始化过程中发生了任何差错，将会抛出一个异常，并且产生一个日志项
            log.error('CSV File Failure')
    # wirteCSVRow从HashFile接收一条成功完成文件哈希计算后的记录
    def writeCSVRow(self, fileName, filePath, fileSize, mTime, aTime, cTime, hashVal, own, grp, mod):
        # 随后该方法用csv writer正式地将该记录写入报告文件
        self.writer.writerow((fileName, filePath, fileSize, mTime, aTime, cTime, hashVal, own, grp, mod))
    # 关闭csvFile文件
    def writerClose(self):
        self.csvFile.close()