python文本去重_Python文件去重工具

#!/usr/bin/env python

# -*- coding: utf-8 -*-

import argparse

import os

import tempfile

import hashlib

def print_dedu():

for file_md5, filelist in dedu_dict.items():

if len(filelist) == 1:

continue

print "md5:{}".format(file_md5)

for filename in filelist:

print "{}".format(filename)

print ""

def exec_dedu():

for file_md5, filelist in dedu_dict.items():

if len(filelist) == 1:

continue

print "md5:{}".format(file_md5)

filelist.pop()

for filename in filelist:

print "rm {}".format(filename)

os.remove(filename)

print ""

parser = argparse.ArgumentParser(description="This is a de-duplicate tool")

parser.add_argument("dir",default=".",

help="target directory")

parser.add_argument("-s","--safe","--scan",

action="store_true",dest="not_delete",

help="scan directory only ,don't delete file")

parser.add_argument("-o","--output",type=argparse.FileType("w"), default=None,

help="output of scan result")

args = parser.parse_args()

print "[INFO]dir:{}".format(args.dir)

if args.not_delete:

print "[INFO]we are in safe mode."

not_delete = args.not_delete

output_fifo=""

output_filename=""

if args.output:

output_fifo=args.output

output_filename=args.output.name

else:

fd, output_filename = tempfile.mkstemp(prefix="dedu-",suffix=".log")

output_fifo=os.fdopen(fd,"w")

print "[INFO]output filename:{}".format(output_filename)

output_fifo.write("Hello World\n")

# check paramter

if not os.path.isdir(args.dir):

print("dir{} is not exists!".format(args.dir))

sys.exit(-1)

target_dir = args.dir

# let's traverse_directory

def md5(fname):

"""

from http://stackoverflow.com/quest-

ions/3431825/generating-a-md5-checksum-of-a-file

"""

hash = hashlib.md5()

with open(fname, "rb") as f:

for chunk in iter(lambda: f.read(4096), b""):

hash.update(chunk)

return hash.hexdigest()

dedu_dict={}

for dirpath, subdirList, subfileList in os.walk(target_dir):

for filename in subfileList:

full_filename = os.path.join(dirpath, filename)

file_md5 = md5(full_filename)

if file_md5 not in dedu_dict :

dedu_dict[file_md5] = []

dedu_dict[file_md5].append(full_filename)

#dedu_dict[file_md5].append(full_filename)

print_dedu()

output_fifo.close()

if not not_delete:

exec_dedu()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值