1.代码示例如下:
import os
import sqlite3
import operator
from collections import OrderedDict
import matplotlib.pyplot as plt
def parse(url):
try:
parsed_url_components = url.split('//')
sublevel_split = parsed_url_components[1].split('/', 1)
domain = sublevel_split[0].replace("www.", "")
return domain
except IndexError:
print "URL format error!"
def analyze(results):
prompt = raw_input("[.] Type <c> to print or <p> to plot\n[>] ")
if prompt == "c":
for site, count in sites_count_sorted.items():
print site, count
elif prompt == "p":
plt.bar(range(len(results)), results.values(), align='edge')
plt.xticks(rotation=45)
plt.xticks(range(len(results)), results.keys())
plt.show()
else:
print "[.] Uh?"
quit()
#path to user's history database (Chrome)
data_path = os.path.expanduser('~')+"\AppData\Local\Google\Chrome\User Data\Default"
files = os.listdir(data_path)
history_db = os.path.join(data_path, 'history')
#querying the db
c = sqlite3.connect(history_db)
cursor = c.cursor()
select_statement = "SELECT urls.url, urls.visit_count FROM urls, visits WHERE urls.id = visits.url;"
cursor.execute(select_statement)
results = cursor.fetchall() #tuple
sites_count = {} #dict makes iterations easier :D
for url, count in results:
url = parse(url)
if url in sites_count:
sites_count[url] += 1
else:
sites_count[url] = 1
sites_count_sorted = OrderedDict(sorted(sites_count.items(), key=operator.itemgetter(1), reverse=True))
analyze (sites_count_sorted)
效果展示:
备注:主要用到sqlite和使用第三方python包matplotlib绘图
参考资料来源于:
<<使用Python分析谷歌浏览器Chrome的历史记录>>
http://mp.weixin.qq.com/s?src=3×tamp=1461695187&ver=1&signature=wnZn1UtWreFWjQbpWweZXp6RRvmmKwW1-Kud3x6OF0fyEUbGoUrfVJkdthk7m33uuO9xMwJhkqhA*Pyih0SCe2VZ2WBo--lqw20BbuZ57E07qwQHjCPUoX5eYgn*5zSAKIQfR3hHefsU2zdYvxWy7vm-Duq7m32TIKMl0y6eFA8=