linux 使用python脚本 找出指定目录下重复文件,并作出处理。

#!/usr/bin/python3
#-*- coding: UTF-8 -*-
import hashlib
import sys
import os
import fnmatch
CHUNK_SIZE = 8192

#Find the documents you need.
def find_specific_files(root):
  file_path=[]
  formats=['*.txt','*.bak','*.log','*.py','*.sh']
  for root,dirname,filenames in os.walk(root):
    for extensions in formats:
       for filename in fnmatch.filter(filenames,extensions):
          file_path.append(os.path.join(root,filename))
  return file_path

#Example Obtain the MD5 value of the file.
def get_chunk(filename):
   d=hashlib.md5()
   with open(filename) as f:
     for line in f:
        d.update(line.encode('utf-8'))
   return d.hexdigest()

def main():
  sys.argv.append("")
  directory=sys.argv[1]
  record={}
  if not directory:
    raise SystemExit ("missing directory parameters")
  if not os.path.isdir(directory):
    raise SystemExit ("{0} is not a directory".format(directory))
  for chunk in find_specific_files(directory):
    checksum=get_chunk(chunk)
    if checksum in record:
        print('{0} and {1} contents are always'.format(record[checksum],chunk))
    else:
        record[checksum]=chunk
if __name__ == '__main__':
  main()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值