上文 用memory_profiler 监控内存耗用写了一个遍历目录夹下所有文件的脚本,约有26万个文件,用时90秒左右。
现在想要将文件名、路径存入数据库, 要求有新增文件时,就写入数据,有减少的文件时,就从数据库删除。
一、 第一步,将文件写入数据库。
1. 上代码:
root@WJL-SH4031667: # cat setFileToSql.py
python hljs 32行
from os import walk
from os.path import join,getmtime
import sqlite3
@profile
def scan():
dbName,tblName = "test.db","fmanages"
conn = sqlite3.connect(dbName)
cursor = conn.cursor()
cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName))
ret = {}
for path, _, files in walk("./"):
for f in files:
if not f.endswith(""):
continue
filename = join(path, f)
try:
cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'{}\',\'{}\', \'study\')".format(tblName, filename.rstrip(), path.rstrip(), getmtime(filename)))
except:
continue
cursor.close()
conn.commit()
conn.close()
if __name__ == "__main__":
scan()
2. 测试运行用时和内存使用情况
root@WJL-SH4031667: # time python -m memory_profiler setFileToSql.py
python hljs 26行
Line # Mem usage Increment Line Contents
================================================
5 29.820 MiB 29.820 MiB @profile
6 def scan():
7 29.824 MiB 0.004 MiB dbName,tblName = "test.db","fmanages"
8 29.984 MiB 0.160 MiB conn = sqlite3.connect(dbName)
9 29.988 MiB 0.004 MiB cursor = conn.cursor()
10 30.473 MiB 0.484 MiB cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName))
12 37.578 MiB -48094.824 MiB for path, _, files in walk("./"):
13 37.578 MiB -479957.762 MiB for f in files:
14 37.578 MiB -431869.512 MiB if not f.endswith(""):
15 continue
20 37.578 MiB -431869.512 MiB try:
21 37.578 MiB -431869.469 MiB cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'{}\',\'{}\', \'study\')".format(tblName, f.rstrip(), path.rstrip(), getmtime(f)))
22 37.578 MiB -431727.805 MiB except:
23 37.578 MiB -431727.809 MiB continue
24
25 31.953 MiB -5.625 MiB cursor.close()
26 31.953 MiB 0.000 MiB conn.commit()
27 31.953 MiB 0.000 MiB conn.close()
python -m memory_profiler setFileToSql.py 94.52s user 253.09s system 99% cpu 5:49.69 total
3. 优化
从上看出,使用数据库用了内存30M左右,相比上文 用memory_profiler 监控内存耗用, 字典方式存储时用的100M,少了70M,字典果然是吃粮大货。想提高sqlite的写入速度?但和上文比对之后,两者时间差不多,优化的空间就不大了。user态用时90秒的时间,都用在了IO的耗时上。
4. 查看生成的数据库文件大小(23万72条记录,42M左右)
root@WJL-SH4031667:# ls -lh test.db
-rwxrwxrwx 1 root root 42M Nov 1 09:01 test.db
root@WJL-SH4031667: # sqlite3 test.db
sqlite3 hljs 3行
sqlite> select count(*) from fmanages;
230072
二、第二步,增加和删除文件后,对应的数据库的记录能够相应的增删。
1. 主要是新增了两个集合的对比,第一个集合是前一次的数据库中所有文件名,第二个集合是现有的文件名集合。两个集合的相减就能得出增/减的文件:
python hljs 15行
#导出原有的文件记录
extRet = dict(cursor.execute('select fname,tm from {}'.format(tblName)).fetchall())
#将上次的文件名集合和这一次的文件名集合弄出来
prekeys = extRet.keys()
nowkeys = ret.keys()
#检查删除的文件
delFile = prekeys - nowkeys
...
#检查新增的文件
newFile = nowkeys - prekeys
...
2. 上代码
root@WJL-SH4031667: # cat setFileToSql.py
python hljs 67行
from os import walk
from os.path import join,getmtime
import sqlite3
@profile
def scan():
dbName,tblName = "test.db","fmanages"
conn = sqlite3.connect(dbName)
cursor = conn.cursor()
ret,extRet = {},{}
i = 1
for path, _, files in walk("./"):
for f in files:
if not f.endswith(""):
continue
filename = join(path, f)
ret[filename] = getmtime(filename)
i += 1
###############以下为新增代码,比较重要的是两个集合还删增############
#检查是否存在table
try:
cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName))
except:
extRet = dict(cursor.execute('select fname,tm from {}'.format(tblName)).fetchall())
#将上次的文件名集合和这一次的文件名集合弄出来
prekeys = extRet.keys()
nowkeys = ret.keys()
#检查删除的文件
delFile = prekeys - nowkeys
print("删除的文件{}".format(delFile))
#检查新增的文件
newFile = nowkeys - prekeys
print("新增的文件{}".format(newFile))
try:
for filename in delFile:
cursor.execute("delete from {} where fname=\'{}\'".format(tblName, filename))
print("删除的文件{}".format(filename))
except:
pass
try:
for filename in newFile:
cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'new\',\'{}\', \'study\')".format(tblName, filename.rstrip(), getmtime(filename)))
print("新增的文件{}".format(filename))
except:
pass
############################################
cursor.close()
conn.commit()
conn.close()
return ret
if __name__ == "__main__":
scan()
3. 增加删减功能之后,观察内存使用情况(多了20M左右,总用时多了1分钟)
(首先去删除一个文件)
python hljs 61行
Filename: setFileToSql.py
Line # Mem usage Increment Line Contents
================================================
6 29.824 MiB 29.824 MiB @profile
7 def scan():
8 29.828 MiB 0.004 MiB dbName,tblName = "test.db","fmanages"
9 29.988 MiB 0.160 MiB conn = sqlite3.connect(dbName)
10 29.992 MiB 0.004 MiB cursor = conn.cursor()
11 29.992 MiB 0.000 MiB ret,extRet = {},{}
12 #\u68C0\u67E5\u662F\u5426\u5B58\u5728table
13 29.992 MiB 0.000 MiB try:
14 30.477 MiB 0.484 MiB cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName))
15 except:
16 extRet = dict(cursor.execute('select fname,tm from {}'.format(tblName)).fetchall())
17
18 30.477 MiB 0.000 MiB i = 1
19 97.188 MiB -5072.457 MiB for path, _, files in walk("./"):
20 97.188 MiB -35328.441 MiB for f in files:
21 97.188 MiB -31115.449 MiB if not f.endswith(""):
22 continue
23
24
25 97.188 MiB -31114.238 MiB filename = join(path, f)
26 97.188 MiB -31074.410 MiB ret[filename] = getmtime(filename)
27 97.188 MiB -31111.219 MiB i += 1
28
29 #\u5C06\u4E0A\u6B21\u7684\u6587\u4EF6\u540D\u96C6\u5408\u548C\u8FD9\u4E00\u6B21\u7684\u6587\u4EF6\u540D\u96C6\u5408\u5F04\u51FA\u6765
30 97.188 MiB 0.000 MiB prekeys = extRet.keys()
31 97.188 MiB 0.000 MiB nowkeys = ret.keys()
32
33 #\u68C0\u67E5\u5220\u9664\u7684\u6587\u4EF6
34 97.188 MiB 0.000 MiB delFile = prekeys - nowkeys
35 97.195 MiB 0.008 MiB print("\u5220\u9664\u7684\u6587\u4EF6{}".format(delFile))
36
37 #\u68C0\u67E5\u65B0\u589E\u7684\u6587\u4EF6
38 111.578 MiB 14.383 MiB newFile = nowkeys - prekeys
39 114.059 MiB 2.480 MiB print("\u65B0\u589E\u7684\u6587\u4EF6{}".format(newFile))
40
41 114.059 MiB 0.000 MiB try:
42 114.059 MiB 0.000 MiB for filename in delFile:
43 cursor.execute("delete from {} where fname=\'{}\'".format(tblName, filename))
44 print("\u5220\u9664\u7684\u6587\u4EF6{}".format(filename))
45 except:
46 pass
47
48
49 114.059 MiB 0.000 MiB try:
50 114.109 MiB 0.000 MiB for filename in newFile:
51 114.109 MiB 0.043 MiB cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'new\',\'{}\', \'study\')".format(tblName, filename.rstrip(), getmtime(filename)))
52 114.109 MiB 0.008 MiB print("\u65B0\u589E\u7684\u6587\u4EF6{}".format(filename))
53 114.109 MiB 0.000 MiB except:
54 114.109 MiB 0.000 MiB pass
55
56 114.109 MiB 0.000 MiB cursor.close()
57 114.109 MiB 0.000 MiB conn.commit()
58 114.109 MiB 0.000 MiB conn.close()
59 114.109 MiB 0.000 MiB return ret
python -m memory_profiler setFileToSql.py 95.17s user 289.77s system 97% cpu 6:32.95 total
4. 观察数据库记录数目变化--230071(23万71条记录,确实比上次少了一个)
root@WJL-SH4031667:# sqlite3 test.db
sqlite3 hljs 3行
sqlite> select count(*) from fmanages;
230071
5. 效果图
enter description here