goal
The problem can be simplified as follows
I have a 2-dimensional data.
The first two of each row is the name of a student and his id.
The following three digits of the data is the unit score of his exam.
The last digit is the total score of his exam.
>>> row1 = ["sue","2016211032",10,20,30,60]
>>> row2 = ["li","2017212323",30,20,40,90]
>>> data = []
>>> data.append(row1)
>>> data.append(row2)
>>> data
[['sue', '2016211032', 10, 20, 30, 60], ['li', '2017212323', 30, 20, 40, 90]]
- Now I am going to get the mean of each unit score and the mean of the total scores the students
- and get the percent of the students whose score is between 90 and 100, 80 and 90, 70 and 80, 60 and 70, and below 60. And then visualize these data.
tools
method
1.create the pandas DataFrame from the 2-dimensional list.
>>> import numpy as np
>>> import pandas as pd
>>> data = pd.DataFrame(data)
>>> data
0 1 2 3 4 5
0 sue 2016211032 10 20 30 60
1 li 2017212323 30 20 40 90
2.set the columns and the index
>>> #set the names of columns
>>> data = pd.DataFrame(data,columns=["name","id","Q1","Q2","Q3","Total"])
>>> #set the index using the name and id columns
>>> data.set_index(['name','id'])
Q1 Q2 Q3 Total
name id
sue 2016211032 10 20 30 60
li 2017212323 30 20 40 90
3. get the statistics of the DataFrame
def getAnalyzeResult(data):
data = pd.DataFrame(data, columns=["name", "id", "Q1", "Q2", "Q3","Q4","Q5","Q6","Q7","Q8", "Total"])
data.set_index(['name', 'id'])
statistics = {}
for column in list(data.columns[2:]):
columnStatistics = {}
columnStatistics['mean'] = np.mean(data[column])
columnStatistics['median'] = np.median(data[column])
columnStatistics['max'] = max(data[column])
columnStatistics['min'] = min(data[column])
statistics[column] = columnStatistics
mins = []
maxs = []
medians = []
means = []
for key in statistics.keys():
mins.append(statistics[key]['min'])
maxs.append(statistics[key]['max'])
medians.append(statistics[key]['median'])
means.append(statistics[key]['mean'])
result = {}
result['mins'] = mins
result['maxs'] = maxs
result['medians'] = medians
result['means'] = means
result['percentageNames'] = ["Total >= 90",
"Total >= 80 & Total < 90",
"Total >= 70 & Total < 80",
"Total >= 60 & Total < 70",
"Total < 60",
]
percentages = []
for index,queryName in enumerate(result['percentageNames']):
percentages.append(len(data.query(queryName)))
result['percentageNames'] = ["90+",
"80~90",
"70~80",
"60~70",
"60-",
]
result['percentageValues'] = percentages
return result
4. visualize with Django and Echarts
in views.py
@login_required
def analyzeScores(request):
teacher = request.user
data = teacher.getDownloadData()
result = getAnalyzeResult(data)
print(result)
context = {'result':result}
return render(request,'main/analyzeScores.html',context)
in analyzeScores.html
{% extends "main/base.html" %}
{% load static %}
{% block title %}
Analyze Scores
{% endblock %}
{% block mainbody %}
{% if result %}
<!-- 为ECharts准备一个具备大小(宽高)的Dom -->
<div id="barChart" style="margin:0 auto;width: 1000px;height:400px;"></div>
<div id="pieChart" style="margin:0 auto;width: 1000px;height:400px;"></div>
<script type="text/javascript">
// 基于准备好的dom,初始化echarts实例
var barChart = echarts.init(document.getElementById('barChart'));
var mins = {{ result.mins }}
var means = {{ result.means }}
var maxs = {{ result.maxs }}
// 指定图表的配置项和数据
var optionForBarChart = {
title: {
text: '2016级'
},
legend: {
data:['min','mean','max']
},
grid: {
left: '3%',
right: '4%',
bottom: '3%',
containLabel: true
},
tooltip : {
trigger: 'axis',
axisPointer : { // 坐标轴指示器,坐标轴触发有效
type : 'shadow'
} // 默认为直线,可选为:'line' | 'shadow'
},
xAxis: {
type : 'category',
data: ["Q1","Q2","Q3","Q4","Q5","Q6","Q7","Q8","total"]
},
yAxis: {
type : 'value'
},
series: [
{
name: 'min',
type: 'bar',
stack: 'min',
data: mins
},
{
name: 'mean',
type: 'bar',
stack: 'mean',
data: means
},
{
name: 'max',
type: 'bar',
stack: 'max',
data: maxs
},
]
};
// 使用刚指定的配置项和数据显示图表。
barChart.setOption(optionForBarChart);
var pieChart = echarts.init(document.getElementById('pieChart'));
var percentageValues = {{ result.percentageValues | safe }}
var percentageNames = {{ result.percentageNames | safe }}
var percentages = new Array();
for (var i=0;i<percentageValues.length;i++)
{
percentage = new Set();
percentage.name = percentageNames[i];
percentage.value = percentageValues[i];
percentages.push(percentage);
}
optionForPieChart = {
backgroundColor: 'white',
title: {
text: 'Distribution Pie',
left: 'center',
top: 20,
textStyle: {
color: 'grey'
}
},
tooltip : {
trigger: 'item',
formatter: "{a} <br/>{b} : {c} ({d}%)"
},
visualMap: {
show: false,
min: 0,
max: 100,
inRange: {
colorLightness: [0, 1]
}
},
series : [
{
name:'distribution',
type:'pie',
radius : '55%',
center: ['50%', '50%'],
data:percentages.sort(function (a, b) { return a.value - b.value; }),
roseType: 'radius',
label: {
normal: {
textStyle: {
color:"black"
}
}
},
labelLine: {
normal: {
lineStyle: {
color: 'grey'
},
smooth: 0.2,
length: 10,
length2: 20
}
},
itemStyle: {
normal: {
color: '#c23531',
shadowBlur: 200,
shadowColor: 'rgba(0, 0, 0, 0.5)'
}
},
animationType: 'scale',
animationEasing: 'elasticOut',
animationDelay: function (idx) {
return Math.random() * 200;
}
}
]
};
pieChart.setOption(optionForPieChart)
</script>
{% else %}
<p>You have no papers processed now, go to process</p>
{% endif %}
<!-- Button trigger modal -->
{% endblock %}