Basic
mtime + checksum + directory traversal
Code
#!/usr/bin/env python
'''
monitor a directory
print the changed files in the directory
strategy: mtime + checksum
'''
import sys
import os
import stat
import cPickle as pickle
def print_usage():
print 'monitor.py init [dir]'
print 'monitor.py check [dir]'
print 'monitor.py refresh [dir]'
print '[dir] defaults to .'
def main(argv=None):
if len(argv) == 1:
action = argv[0]
directory = '.'
elif len(argv) == 2:
action = argv[0]
directory = argv[1]
else:
print_usage()
return 1
if action != 'init' and action != 'check' and action != 'refresh':
print_usage()
return 1
directory = os.path.abspath(directory)
monitor = Monitor(directory)
if action == 'init':
monitor.init_dir()
return 0
elif action == 'check':
monitor.check_dir()
return 0
elif action == 'refresh':
monitor.refresh_dir()
return 0
else:
print 'Unexpeceted Error!'
return 1
class Monitor(object):
def __init__(self, directory):
self.directory = directory
def construct_cache(self):
fileCacheList = []
# recursively traverse the directory, cache each files's mtime and checksum
# {filename1:(mtime, checksum), filename2:(mtime, checksum), ....}
for dirpath, dirnames, filenames in os.walk(self.directory):
for f in filenames:
if f == '.cache':
continue # exclude .cache file
f = os.path.join(dirpath, f)
print 'dealing with', f
filecache = FileCache(f)
fileCacheList.append(filecache)
# dump fileCacheList to .cache
cacheFilePath = os.path.abspath(os.path.join(self.directory, '.cache'))
with open(cacheFilePath, 'wb') as cacheFile:
pickler = pickle.Pickler(cacheFile)
for filecache in fileCacheList:
pickler.dump(filecache)
def init_dir(self):
'''
init directory
cache the mtime and checksum of all files in the directory
dump the cache to .cache in the direcotry
'''
print 'init_dir'
self.construct_cache()
print 'init', self.directory, 'success'
return
def check_dir(self):
'''
check directory to determine which files have changed
'''
# print 'check_dir'
# make sure the directory has been initialized
# i.e. there's a .cache file under this direcotry
files = os.listdir(self.directory)
if not '.cache' in files:
print self.directory, 'has not been initialized yet'
return
# reconstruct fileCacheList
fileCacheList = []
cacheFilePath = os.path.abspath(os.path.join(self.directory, '.cache'))
with open(cacheFilePath, 'rb') as cache:
pickled = pickle.Unpickler(cache)
while cache:
try:
filecache = pickled.load()
except:
break
if isinstance(filecache, FileCache):
fileCacheList.append(filecache)
# construct a dict from fileCacheList
dictFiles = {}
for fc in fileCacheList:
dictFiles[fc.filepath] = (fc.mtime, fc.checksum)
# traverse the target directory and determine which files have changed
for dirpath, dirnames, filenames in os.walk(self.directory):
for f in filenames:
if f == '.cache':
continue # exclude .cache file
f = os.path.join(dirpath, f)
# print 'checking', f
if f not in dictFiles:
print '[ADD]:', f
else: # f in dictFiles
smtime = dictFiles[f][0]
cmtime = os.stat(f)[stat.ST_MTIME]
if cmtime == smtime:
pass
else:
# print 'file %s changed mtime, recompute checksum' % f
schecksum = dictFiles[f][1]
cchecksum = md5_file(f)
if schecksum == cchecksum:
pass
else:
print '[CHANGED]:', f
# remove entry f from dictFiles
dictFiles.pop(f)
# tranverse ended
if len(dictFiles) != 0:
for f in dictFiles:
print '[REMOVE]:', f
def refresh_dir(self):
print 'refresh_dir'
self.construct_cache()
print 'refresh %s success' % self.directory
def md5_file(filename):
try:
import hashlib
m = hashlib.md5()
except ImportError:
import md5
m = md5.new()
for line in open(filename):
m.update(line)
return m.hexdigest()
class FileCache(object):
def __init__(self, f):
self.filepath = os.path.abspath(f)
self.mtime = os.stat(f)[stat.ST_MTIME]
self.checksum = md5_file(f)
def __str__(self):
return self.filepath+str(self.mtime)+str(self.checksum)
if __name__ == '__main__':
main(sys.argv[1:])
Test
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py init
init_dir
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py~
dealing with /home/chenqi/mypro/python/monitorDir/test1/1.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/2.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/5.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/6.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/4.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/3.txt
init /home/chenqi/mypro/python/monitorDir success
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ touch monitor.py
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py check
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ touch 1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py check
[ADD]: /home/chenqi/mypro/python/monitorDir/1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py refresh
refresh_dir
dealing with /home/chenqi/mypro/python/monitorDir/1.txt
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py~
dealing with /home/chenqi/mypro/python/monitorDir/test1/1.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/2.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/5.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/6.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/4.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/3.txt
refresh /home/chenqi/mypro/python/monitorDir success
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ rm 1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py check
[REMOVE]: /home/chenqi/mypro/python/monitorDir/1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$