数据挖掘研讨课结束了,这门课的考核方法是每个同学根据班里面同学的课堂表现打分,然后老师再取截断平均值作为最后的分数。
于是我就想,能否用python进行自动化打分呢,答案是肯定的。
老师发给我们的课堂发言记录是pdf格式,首先我在网上下载了迅捷pdf转word工具,由于这个不付费只能一次转5页,所以我又下载了迅捷pdf编辑器,一次删除5页图片,最后将数据汇总到txt中。
通过观察可以很容易发现,记录中记录同学信息的格式是(xxx, xxx,xxx),于是可以使用python将所有同学信息提取出来,并且统计每个同学的回答频率,最后根据回答频率给出分数。
打分表是一张xlsx格式的表格,那里面详细记录了同学学号,姓名和班级信息,将上述过程计算得到的分数和信息录入数据库,然后再根据表格的学号在数据库中查询成绩,再录入表格中,这样,就大功告成啦。
python使用到的库:
xlrd xls读取库
xlwt xls写入库
pymysql python操作mysql库
xlutils 表格复制库
具体代码如下:
#coding:utf-8
import xlrd
import xlwt
from xlutils.copy import copy
import pymysql.cursors
#提取出()的内容
def extract():
with open("学生记录.txt","w") as data:
with open("数据挖掘课程记录.txt", "r") as file:
lines = file.readlines() #读取每一行
for line in lines: #从每一行中读取出()内容
if(line.find("(") != -1):
tuple = line.split("(")[1].split(")")[0]
data.write(tuple) #些许数据文件
data.write("\n")
#对提取的数据进行数据清洗
#返回回答频率字典name
def cleanout():
with open("学生记录.txt","r") as data:
#统计每个同学出现次数
name = {}
lines = data.readlines()
for line in lines:
print(line)
if(line.find(",") != -1):
st = line.split(",")[1]
print(st)
if st in name: #如果已经记录了这个名字就将频率加一
print("在")
name[st] = name[st] + 1
else: #否则就加入字典
print("不在")
name[st] = 1
print(name)
#接着写入成绩文件
return name
#将数据录入数据库中
def entering(name = {}):
#连接数据库
connection = pymysql.connect(
host = "localhost",
user = "root",
password = "root",
db = "mark"
)
data = []
try:
with connection.cursor() as cur:
with open("学生记录.txt","r") as file:
lines = file.readlines()
for line in lines:
if(line.find(",") != -1):
data = []
data.append(line.split(",")[0])
data.append(line.split(",")[1])
data.append(line.split(",")[2])
num = name[data[1]]
print(name[data[1]])
if(num == 1):
grad = 94
elif(num == 2):
grad = 96
elif(num == 3):
grad = 98
elif(num == 4):
grad = 100
print(grad)
print(data[0])
print(data[1])
print(data[2])
id = int(data[0])
sel = "SELECT * FROM grade WHERE id = %s"
sul = cur.execute(sel, id)
if(sul == 0):
sql = "INSERT INTO grade values(%s,%s,%s,%s)"
cur.execute(sql, (id,data[1],data[2],grad))
connection.commit() #提交事务
finally:
connection.close()
#接着按照数据库的成绩写入excel表格中
def wtexcel():
connection = pymysql.connect(
host = "localhost",
user = "root",
password = "root",
db = "mark"
)
try:
with connection.cursor() as cur:
file = xlrd.open_workbook("2018秋打分表weiweihit@163.com.xlsx")
table = file.sheets()[0]
nrows = table.nrows
wb = copy(file)
sheet = wb.get_sheet(0)
for row in range(1, nrows):
#print(row)
id = int(table.cell(row, 1).value)
sel = "SELECT grade FROM grade WHERE id = %s"
sul = cur.execute(sel, id)
grad = cur.fetchone()
if(sul == 0): #没有记录就是75分
sheet.write(row, 4, 75)
else:
sheet.write(row, 4, grad[0])
wb.save("打分表.xls")
finally:
connection.close()
if __name__=="__main__":
#extract()
#cleanout()
#name = {}
#name = cleanout()
#entering(name)
wtexcel()