文件信息分析
具体要求
对本机的所有的文件信息数据进行搜索和收集,绘制数据分布的直方图。
- 文件大小分布的直方图
- 文件创建时间和修改时间的直方图
- 文件类型的直方图
python 代码实现
import os
import time
import matplotlib.pyplot as plt
# import numpy as np
# from matplotlib import font_manager
# 初始化字符串为空
# str1 = ''
n_list = []
s_list = []
tc_list = []
tm_list = []
tp_list = []
def timeShift(timestamp):
"""
时间戳转换时间
:param timestamp: 时间戳
:return: 固定格式的时间
"""
time_local = time.localtime(timestamp)
dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local)
return dt
def getFileInfo(fPath):
"""
获取文件名称,大小,创建时间,修改时间以及文件类型
返回值保存在列表中
:param fPath: 想要获取的文件路径
:return:
"""
for root, dirs, files in os.walk(fPath):
for file in files:
if os.path.isfile(root + '//' + file):
n_list.append(file)
s_list.append(os.path.getsize(root + '//' + file))
tc_list.append(timeShift(os.path.getctime(root + '//' + file)))
tm_list.append(timeShift(os.path.getmtime(root + '//' + file)))
tp_list.append(os.path.splitext(root + '//' + file)[1])
# str1 = (file + '---' + str(os.path.getsize(root + '//' + file))
# + '---' + str(timeShift(os.path.getctime(root + '//' + file)))
# + '---' + str(timeShift(os.path.getmtime(root + '//' + file)))
# + '---' + str(os.path.splitext(root + '//' + file)[1]))
# print(str1)
else:
return
# 调用函数对指定路径的文件进行遍历,获取文件信息
findPath = input("请输入需要查找的文件路径:")
getFileInfo(findPath)
# 对列表中保存的数据进行测试
# for i in n_list:
# print(i)
#
# for i in s_list:
# print(i)
#
# for i in tc_list:
# print(i)
#
# for i in tm_list:
# print(i)
#
# for i in tp_list:
# print(i)
# 遍历数据进行判断
# sum1保存大小,sum2保存创建时间,sum3保存修改时间,sum4保存类型
sum1 = [0, 0, 0, 0]
sum2 = [0, 0, 0, 0]
sum3 = [0, 0, 0, 0]
sum4 = [0, 0, 0, 0, 0]
def size_Sort():
"""
对文件大小进行分类
"""
for i in s_list:
if 0 <= i < 500:
sum1[0] += 1
elif 500 <= i < 2000:
sum1[1] += 1
elif 2000 <= i < 10000:
sum1[2] += 1
else:
sum1[3] += 1
# 测试
# size_Sort()
# for i in sum1:
# print(i)
def timec_Sort():
"""
对文件创建时间时间进行分类
"""
for i in tc_list:
if '2019-06-01 00:00:00' <= i < '2019-07-01 00:00:00':
sum2[0] += 1
elif "2019-07-01 00:00:00" <= i < '2019-08-01 00:00:00':
sum2[1] += 1
elif "2019-08-01 00:00:00" <= i < '2019-09-01 00:00:00':
sum2[2] += 1
else:
sum2[3] += 1
# 测试
# timec_Sort()
# for i in sum2:
# print(i)
def timem_Sort():
"""
对文件修改时间时间进行分类
"""
for i in tm_list:
if '2019-06-01 00:00:00' <= i < '2019-07-01 00:00:00':
sum3[0] += 1
elif "2019-07-01 00:00:00" <= i < '2019-08-01 00:00:00':
sum3[1] += 1
elif "2019-08-01 00:00:00" <= i < '2019-09-01 00:00:00':
sum3[2] += 1
else:
sum3[3] += 1
# 测试
# timem_Sort()
# for i in sum2:
# print(i)
def type_class():
"""
对文件类型进行分类
"""
for i in tp_list:
if i == '.jpg' or i == '.png' or i == '.gif':
sum4[0] += 1
elif i == '.js' or i == '.css' or i == '.py' or i == '.htm':
sum4[1] += 1
elif i == '.txt' or i == '.xml' or i == '':
sum4[2] += 1
elif i == '.avi' or i == '.flv' or i == '.mp4' or i == '.wmv':
sum4[3] += 1
else:
sum4[4] += 1
# 测试
# type_class()
# for i in sum4:
# print(i)
def size_tu():
"""
制作文件大小柱状图
"""
size_Sort()
name_list = [500, 2000, 10000, 100000]
plt.bar(range(len(sum1)), sum1,
tick_label=name_list)
plt.show()
def timec_tu():
"""
制作文件创建时间柱状图
"""
timec_Sort()
name_list = ['2019-06-01', '2019-07-01', '2019-08-01', '2019-09-01']
plt.bar(range(len(sum2)), sum2,
tick_label=name_list)
plt.show()
def timem_tu():
"""
制作文件修改时间柱状图
"""
timem_Sort()
name_list = ['2019-06-01', '2019-07-01', '2019-08-01', '2019-09-01']
plt.bar(range(len(sum3)), sum3,
tick_label=name_list)
plt.show()
def type_tu():
type_class()
# 解决中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
name_list = ['图片', '编程', '文本', '视频', '其他']
plt.bar(range(len(sum4)), sum4,
tick_label=name_list)
plt.show()
size_tu()
timec_tu()
timem_tu()
type_tu()
运行结果
- 图1
- 图2