'''
功能:
1.查询所有md5
2.找出重复值
3.根据重复值输出 文件目录(parent_path)+文件名(server_filename)
a = [99,1,2,1,3,4]
# 集合存储重复数据
b=set()
for i in a:
if a.count(i)>1:
b.update([i])
dict ={}
# 遍历重复数据
for j in b:
list = []
for A in range(len(a)):
if j == a[A]:
list.append(A)
dict[j] = list
print(dict)
'''
import sqlite3,time
def repeats():
db=sqlite3.connect('hxz.db')
c=db.cursor()
c.execute('SELECT md5 from cache_file')
md5s=c.fetchall()
md5s_cf=set()
for i in md5s:
if md5s.count(i)>1:
md5s_cf.update([i])
md5s=list(md5s_cf)
print(type(md5s))
md5_l=[]
for (i,) in md5s:
## print(i)
md5_l.append(i)
#print(md5_l)
paths={}
for j in md5_l:
c.execute('SELECT parent_path, server_filename FROM cache_file WHERE md5='+'\''+j+'\'')
paths[]=c.fetchone()
paths.append(path)
##for (path,) in paths:
## print(path)
print('paths长度是:',len(paths))
while "" in paths:
paths.remove(" ")
db.close()
print('去空后的paths长度是:',len(paths))
## return paths
repeats()
数据库(右键打开位置,找到users点进去,第一个.db即文件列表,内含hd5),复制到同一目录:
'''
功能:
1.查询所有md5
2.找出重复值
3.根据重复值输出 文件目录(parent_path)+文件名(server_filename)
a = [99,1,2,1,3,4]
# 集合存储重复数据
b=set()
for i in a:
if a.count(i)>1:
b.update([i])
dict ={}
# 遍历重复数据
for j in b:
list = []
for A in range(len(a)):
if j == a[A]:
list.append(A)
dict[j] = list
print(dict)
'''
import sqlite3,time
def repeats():
db=sqlite3.connect('hxz.db')
c=db.cursor()
c.execute('SELECT md5 from cache_file')
md5s=c.fetchall()
md5s_cf=set()
for i in md5s:
if md5s.count(i)>1:
md5s_cf.update([i])
md5s=list(md5s_cf)
print(type(md5s))
md5_l=[]
for (i,) in md5s:
## print(i)
md5_l.append(i)
#print(md5_l)
paths=[]
for j in md5_l:
c.execute('SELECT server_filename FROM cache_file WHERE md5='+'\''+j+'\'')
path=c.fetchone()
paths.append(path)
##for (path,) in paths:
## print(path)
print('paths长度是:',len(paths))
while "" in paths:
paths.remove(" ")
db.close()
print('去空后的paths长度是:',len(paths))
return paths
##repeats()