需求:
本地文件中,查找在书单<信息安全从业者书单>的书籍。
原理:
遍历 README.md 将通过Everything SDK在本地查找每本书。
1、计算文件CRC32
因为只是确定本地文件的唯一性,CRC32计算效率上比md5和sha1更快,所以计算CRC.
#!usr/bin/env python
#-*- coding:utf-8 -*-
import zlib
import os
block_size = 1024 * 1024
#从文件中读取block_size大小,计算CRC32
def crc32_simple(filepath):
try:
with open(filepath,'rb') as f:
s=f.read(block_size)
return zlib.crc32(s,0)
except Exception as e:
print(str(e))
return 0
#计算整个文件的crc32
def crc32_file(filepath):
crc = 0
try:
fd = open(filepath, 'rb')
while True:
buffer = fd.read(block_size)
if len(buffer) == 0: # EOF or file empty. return hashes
fd.close()
if sys.version_info[0] < 3 and crc < 0:
crc += 2 ** 32
return crc#返回的是十进制的值
crc = zlib.crc32(buffer, crc)
except Exception as e:
if sys.version_info[0] < 3:
error = unicode(e)
else:
error = str(e)
print(error)
return 0
2、文件大小自动变换单位
递归实现 文件大小根据bytes,返回合理区间['B', 'KB', 'MB', 'GB', 'TB', 'PB']。eg : 16473740 bytes--> 15.727 MB
#根据文件大小 返回合理区间,16473740 bytes--> 15.727 MB
def FormatSize(size):
print(size)
#递归实现,精确为最大单位值 + 小数点后三位
def formatsize(integer, remainder, level):
if integer >= 1024:
remainder = integer % 1024
integer //= 1024
level += 1
return formatsize(integer, remainder, level)
else:
return integer, remainder, level
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
integer, remainder, level = formatsize(size, 0, 0)
if level+1 > len(units):
level = -1
return ( '{}.{:>03d} {}'.format(integer, remainder, units[level]) )
3、调用Everything SDK,通过everything64.dll来完成交互。
import ctypes
import datetime
import struct
#dll imports
everything_dll = ctypes.WinDLL (r"./Everything64.dll")
everything_dll.Everything_GetResultDateModified.argtypes = [ctypes.c_int,ctypes.