数据库中每条记录存在一个字段叫做year,现在需要统计所有year出现的次数,并画出直方图
代码
# 获取数据库中所有的year,构成一个集合
import os
import sqlite3
from collections import Counter
import matplotlib.pyplot as plt
# 从数据库中的获取到所有文章年代的集合
def get_year_set():
dbname = "../allmessage.db"
if (not os.path.exists(dbname)):
print("The database is not exist!")
return -1
filename = "year_set.txt"
if (os.path.exists(filename)):
print("The "+filename+" is exist, and the programming is removing!")
os.remove(filename)
f = open(filename, "a")
conn = sqlite3.connect(dbname)
cursor = conn.cursor()
cursor.execute("select year from table1")
rows = cursor.fetchall()
for row in rows:
row = list(row)
f.write(str(row[0]))
f.write(",")
conn.commit()
cursor.close()
conn.close()
f.close()
# 从年代中得到年代,和年代的数量 文件
def get_year_count():
filename = "year_set.txt"
if (not os.path.exists(filename)):
print("The "+filename+" is not exists")
return -1
f = open(filename, "r")
filename_write = "year_count.txt"
if (os.path.exists(filename_write)):
os.remove(filename_write)
f_write = open(filename_write, "a")
line = f.readline().strip("\n").split(",")
line = line[:-1]
myset = set(line)
elementCounter = Counter(line)
for item in myset:
f_write.write(str(item))
f_write.write(",")
f_write.write(str(elementCounter[item]))
f_write.write("\n")
f_write.close()
f.close()
# 按照年代的大小进行排序
def sort_year_count():
filename_read = "year_count.txt"
if (not os.path.exists(filename_read)):
print("The file is not exists!")
return -1
f_read = open(filename_read, "r")
filename_write = "year_count_sort.txt"
if (os.path.exists(filename_write)):
os.remove(filename_write)
f_write = open(filename_write, "a")
sort_list = []
line = f_read.readline()
while line:
line = line.strip("\n").split(",")
a = line[0]
b = line[1]
temp_tuple = (a, b)
sort_list.append(temp_tuple)
line = f_read.readline()
sort_list = sorted(sort_list, key=lambda allref_tuple:allref_tuple[0])
for item in sort_list:
if item[0] == '':
f_write.write('0')
f_write.write(",")
f_write.write(str(item[1]))
f_write.write("\n")
continue
f_write.write(str(item[0]))
f_write.write(",")
f_write.write(str(item[1]))
f_write.write("\n")
f_write.close()
f_read.close()
# 得到直方图
def get_graph():
filename = "year_count_sort.txt"
if (not os.path.exists(filename)):
print("The filename is not exists!")
return -1
f = open(filename, "r")
line = f.readline()
x = []
y = []
while line:
line = line.strip("\n").split(",")
x.append(int(line[0]))
y.append(int(line[1]))
line = f.readline()
plt.bar(x, y, alpha = .5, log=True, color = 'g') # 以log形式展示
plt.xlabel("Year")
plt.ylabel("Count")
plt.xlim(1935, 2015)
plt.ylim(0, 200761)
plt.title("Year-Count")
plt.savefig("year_count.png", format="png")
if __name__ == '__main__':
get_year_set()
get_year_count()
sort_year_count()
get_graph()
效果图:
喜欢 (0)or分享 (0)