Pfish.py
# -*- coding:UTF-8 -*-
#
# p-fish : Python File System Hash Program
# Author: C. Hosmer
# July 2013
# Version 1.0
#
import logging #Python Standard Library Logger
import time #Python Standard Library time functions
import sys #Python Library System specific parameters
import _pfish #_pfish Support Function Module
if __name__ == '__main__':
# 软件版本号
PFISH_VERSION = '1.0'
# Turn on Logging
# 将日志存储在pFishLog.log文件中,设置日志记录级别为DEBUG,并且 对每一个日志事件记录时间及日期
# DEBUG级别是最低级别,将确保所有送往日志记录器的消息都是可见的
logging.basicConfig(filename='pFishLog.log', level=logging.DEBUG, format='%(asctime)s %(message)s')
# Process the Command Line Arguments
# 调用函数,将控制转到处理命令行参数,前缀_pfish因为此函数存在于_pfish模块中
# 如果解析成功,函数将返回此处,否则将向用户发送一条消息并退出程序
_pfish.ParseCommandLine()
#Record the Starting Time
# 计算程序当前启动时间,以计算出处理所花费的时间(取证角度:调查中时间是一个关键因素)
# time.time() 从纪元 起所逝去的秒数
startTime = time.time()
# Record the Welcome Message
# 程序向日志发送一条消息,报告扫描的开始,仅在命令行中选择了verbose选项时才会在屏幕上显示此消息
logging.info('')
logging.info('Welcome to p-fish version' + PFISH_VERSION + '...New Scan Started')
logging.info('')
# 这里可以添加关于组织、调查员姓名、案件编号以及其它案件与相关信息。
_pfish.DisplayMessage('Welcome to p-fish ...version' + PFISH_VERSION)
# Record some information regarding the System
logging.info('System: ' + sys.platform)
logging.info('Version: ' + sys.version)
# Traverse the file system directories and hash the files
# 启动函数,从指定的根路径开始遍历目录结构,返回成功处理的文件数量
filesProcessed = _pfish.WalkPath()
# Record the end time and calculate the duration
# 通过endTime - startTime可以确定执行文件系统哈希操作花费的秒数,可以转化为天、小时、分钟等形式
endTime = time.time()
duration = endTime - startTime
logging.info('Files Processed: ' + str(filesProcessed))
logging.info('Elapsed Time: ' + str(duration) + 'seconds')
logging.info('')
logging.info('Program Terminated Normally')
logging.info('')
_pfish.DisplayMessage('程序运行: ' + str(duration) + ' 秒')
_pfish.DisplayMessage("Program End")
_Pfish.py
#-*- coding: UTF-8 -*-
#
# pfish support functions, where all the real work gets done
#
# Display Message() ParseCommandLine() WalkPath()
# HashFile() class_CVSWriter
# ValidateDirectory() ValidateDirectoryWritable()
#
import os
import stat
import time
import hashlib
import argparse
import csv
import logging
log = logging.getLogger('main._pfish')
#
# Name: ParseCommand() Function
#
# Desc: Process and Validate the command line arguments
# use Python Standard Library module argparse
#
# Input: none
#
# Actions:
# Uses the standard library argparse to process the command line
# establisthes a global variable gl_args where any of the functions can
# obtain argument information
#
def ParseCommandLine():
parser = argparse.ArgumentParser('Python file system hashing ..p-fish')
# 指定此项,任何对DisplayMessage()函数的调用将显示到标准输出设备,否则程序将静默运行
parser.add_argument('-v', '--verbose', help='allows progress message to be displayed', action='store_true')
#setup a group where the selection is mutually exclusive and required.
# 哈希类型选择,为互斥选项,必须做出单项选项,否则程序终止。
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--md5', help='specififes MD5 algorithm', action='store_true')
group.add_argument('--sha256', help='specififes SHA256 algorithm', action='store_true')
group.add_argument('--sha512', help='specififes SHA512 algorithm', action='store_true')
# 允许用户指定遍历的开始或根路径
parser.add_argument('-d', '--rootPath', type=ValidateDirectory, required=True, help="specify the root path for hashing")
# 允许用户指定生成的报告文件将要写入的目录
parser.add_argument('-r', '--reportPath', type=ValidateDirectoryWritable, required=True, help="specify the path for reports and logs will be written")
# type选项不同,因为要求argparse验证指定的类型,使用-d选项时确认rootPath是存在并且可读的,且reportPath则必
# 须存在且可写,由于argparse没有内置验证目录的函数,所以创建了函数ValidateDirectory()和ValidateDirectoryWritable()
#create a global object to hold the validated arguments, these will be available then
#to all the Function within the_pfish.py module
# 生成全局变量存储选项生成的参数
global gl_args
global gl_hashType
gl_args = parser.parse_args()
# 互斥选项中通过参数选择后,结果会被置为True
if gl_args.md5:
gl_hashType = 'MD5'
elif gl_args.sha256:
gl_hashType = 'SHA256'
elif gl_args.sha512:
gl_hashType = 'SHA512'
else:
gl_hashType = "Unknown"
logging.error('Unknown Hash Type Specified')
DisplayMessage("Command line processed: Successfully")
return
# End ParseCommandLine ====================================================
#
# Name: WalkPath() Function
#,
# Desc: Walk the path specified on the command line use Python Standard Library module os and sys
#
# Input: none, uses command line arguments
#
# Actions:
# Uses the standard Library modules os and sys to traverse the directory structure starting a root
# path specified by the user. For each file discovered. WalkPath will call the Function HashFile()
# to perform the file hashing
#
def WalkPath():
# 记录成功处理的文件数量
processCount = 0
errorCount = 0
# 初始化CVS
oCVS = _CSVWriter(gl_args.reportPath + 'fileSystemReport.csv', gl_hashType)
#Create a loop that process all the files starting at the root Path, all sub-directories will also be processed
# 向日志文件发送一消息,以记录根路径的值
log.info('Root Path: ' + gl_args.rootPath)
# 使用os.walk方法和rootPath创建一个循环,建立一个将会在下一个循环中处理文件名的清单
for root, dirs, files in os.walk(gl_args.rootPath):
#for each file obtain the filename and call the HashFile Function
# 循环处理清单中的每一个文件
for file in files:
# 生成加入路径的文件名
fname = os.path.join(root, file,)
# 调用HashFile函数对CSV写入器访问,将哈希运算结果写入CSV文件中
result = HashFile(fname, file, oCVS)
#if hashing was successful then increment the processCount
# processCount和errorCount的值会根据情况相应地增加
if result is True:
processCount += 1
#if not successful, the increment the errorCount
else:
errorCount += 1
# 所有目录和文件处理完成后,CVSWriter便会关闭,并且函数会向主程序返回成功处理的文件数目
oCVS.writerClose()
return(processCount)
#End WalkPath ===============================================================
#
# Name: HashFile Function
#
# Desc: Processes a single file which includes performing a hash of the file and the extraction of metadata
# garding the file processed use Python Standard Library modules hashlib, is, and sys
#
# Input: theFile = the full path of the file
# simpleName = just the filename itselt
#
# Actions:
# Attempts to hash the file and extract metadata Call GenerateReport for successful hashed files
#
def HashFile(theFile, simpleName, o_result):
#Verify that the path is valid
# 每一项验证都有一个相应的错误记录,并在错误发生时发送到日志文件中,如果一个文件被忽略,程序将返回
# WalkFile,然后处理下一个文件
# 路径是否存在?
if os.path.exists(theFile):
#Verify that the path is not a symbolic link
# 路径是一个链接而不是一个真实的文件吗?
if not os.path.islink(theFile):
#Verify that the file is real
# 文件是真实的吗?
if os.path.isfile(theFile):
try:
#Attempt to open the file
# 使用‘rb’只读方式打开读取文件,避免因为权限、文件被锁或已损坏等原因造成的无法读取
f = open(theFile, 'rb')
except IOError:
# if open fails report the error
log.warning('Open Failed: ' + theFile)
return
else:
try:
#Attempt to read the file
rd = f.read()
except IOError:
# if read fails, the close the file and report error
f.close()
log.warning('Read Failed: ' + theFile)
return
else:
#success the is open and we can read from it lets query the file stats
# 文件成功打开并且允许读取,就将与文件相关属性提取出来
# 属性包括所有者、分给、大小、MAC时间和模式,这些属性将记录到CSV文件中
theFileStats = os.stat(theFile)
(mode, info, dev, nlink, uid, gid, size, atime, mtime, ctime) = os.stat(theFile)
#print the simple file name
DisplayMessage("Processing File: " + theFile)
#print the size of the file in Bytes
fileSize = str(size)
#print MAC Times
modifiedTime = time.ctime(mtime)
accessTime = time.ctime(atime)
createdTime = time.ctime(ctime)
ownerID = str(uid)
groupID = str(gid)
fileMode = bin(mode)
#process the file hashes
if gl_args.md5:
#Calcuation and Print the MD5
hash = hashlib.md5()
hash.update(rd)
hexMD5 = hash.hexdigest()
hashValue = hexMD5.upper()
elif gl_args.sha256:
hash = hashlib.sha256()
hash.update(rd)
hexSHA256 = hash.hexdigest()
hashValue = hexSHA256.upper()
elif gl_args.sha512:
# Calculate and Print the SHA512
hash = hashlib.sha512()
hash.update(rd)
hexSHA512 = hash.hexdigest()
hashValue = hexSHA512.upper()
else:
log.error('Hash not Selected')
#File processing completed
#Close the Active File
print "=========================================="
f.close()
# write one row to the output file
# 文件处理完成,使用CSV类将记录写入报告文件,然后成功返回调用函数WalkPath
o_result.writeCSVRow(simpleName, theFile, fileSize, modifiedTime, accessTime, createdTime, hashValue, ownerID, groupID, mode)
return True
# 发送警告消息到日志文件,警告信息与文件处理过程中遇到的问题相关
else:
log.warning('[' + repr(simpleName) + ', Skipped NOT a File' + ']')
return False
else:
log.warning('[' + repr(simpleName) + ', Skipped Link NOT a File' + ']' )
return False
else:
log.warning('[' + repr(simpleName) + ', Path does NOT exist' + ']')
return False
# End HashFile Funtion====================================================
#
# Name: ValidateDirectory Function
#
#
# Desc: Function that will validate a directory path as existing and readable. Used for argument
# validation only
#
# Input: a directory path string
#
# Actions:
# if valid will return the Directory String
# if invalid it will raise an ArgumentTypeError within argparse
# which will in turn be reported by argparse to the user
#
def ValidateDirectory(theDir):
# Validate the path is a directory
if not os.path.isdir(theDir):
raise argparse.ArgumentTypeError('Directory does not exist')
#Validate the path is readable
if os.access(theDir, os.R_OK):
return theDir
else:
raise argparse.ArgumentTypeError('Directory is not readable')
#End ValidateDirectory ==============================================
#
# Name: ValidateDirectoryWritable Function
#
# Desc: Function that will validate a directory path as existing and writable. Used for
# argument validation only
#
# Input: a directory path String
#
# Actions:
# if valid will return the Directory String
#
# if invalid it will raise an ArgumentTypeError within argparse which will
# in turn be reported by argparse to the user
#
def ValidateDirectoryWritable(theDir):
# Validate the path is a directory
# 测试目录是否字符串真实存在,如果失败便在argparse里引发一个错误并显示"Directory does not exist"消息
if not os.path.isdir(theDir):
raise argparse.ArgumentTypeError('Directory does not exist')
#Validate the path is writable
# 检验目录是否写权限,同样,如果失败会引发一个错误并显示消息
if os.access(theDir, os.W_OK):
return theDir
else:
raise argparse.ArgumentTypeError('Directory is not writable')
## End ValidateDirectoryWritable====================================================
#
# Name: DisplayMessage() Function
#
#
# Desc: Displays the message if the verbose if the verbose command line option is present
#
# Input: message type string
#
# Actions:
# Uses the standard library print function to display the message
#
def DisplayMessage(msg):
if gl_args.verbose:
print(msg)
return
## End DisplayMessage====================================================
#
# Class: _CSVWriter
#
#
# Desc: Handles all methods related to comma separated value operations
#
#
# Methods constructor : Initializes the CSV File
# writeCVSRow: Wirtes a single row to the csv file
# writerClose: Close the CSV File
#
class _CSVWriter:
def __init__(self, fileName, hashType):
try:
# Create a writer object and then write the header row
# 打开输出文件csvFile
self.csvFile = open(fileName, 'wb')
# 初始化csv.writer
self.writer = csv.writer(self.csvFile, delimiter=',', quoting=csv.QUOTE_ALL)
# 写出所有列名组成标题行
self.writer.writerow(('File', 'Path', 'Size', 'modified Time', 'Access Time', 'Created Time', hashType, 'Owner', 'Group', 'Mode'))
except:
# 如果在初始化过程中发生了任何差错,将会抛出一个异常,并且产生一个日志项
log.error('CSV File Failure')
# wirteCSVRow从HashFile接收一条成功完成文件哈希计算后的记录
def writeCSVRow(self, fileName, filePath, fileSize, mTime, aTime, cTime, hashVal, own, grp, mod):
# 随后该方法用csv writer正式地将该记录写入报告文件
self.writer.writerow((fileName, filePath, fileSize, mTime, aTime, cTime, hashVal, own, grp, mod))
# 关闭csvFile文件
def writerClose(self):
self.csvFile.close()