对 Udemy 在线网络课程进行数据分析与可视化展现,数据包含了2011年到2017年的所有课程数据,每个课程数据包含了课程名称、价格、学科、评论数量、订阅数量等信息。用到了 pandas 和 echarts。
数据来源: Udemy Courses - Kaggle
开发流程
利用 Jupyter Notebook 对数据进行预处理、EDA和导出数据
- 加载数据,查看数据基本信息,必要的数据预处理部分
- 探索性数据分析,设定问题,找出数据包含的信息、
- 同步与可视乎阶段进行,根据 Echarts 配置参数中 data 等属性的需要,用 Python 导出合适的 JavaScript 数组/对象 数据,方便 Echarts 使用
利用 Echarts库 编写可视化组件
- 新建 HTML 文件 index.html,初始化基本信息,引入必要的 JS 库
- 确定可视化组件的内容,确定布局位置,这些都在 HTML 文件中完成
- 新建 main.js 文件,使用 echart 配置各种所需图表,为了整洁型,封装成函数
- 需要的数据全部封装到 data.js 文件中,方便 main.js 引用
- 反复打开 index.html 进行测试,确保功能都正常显示
可视化组件
- 课程 免费/付费 数量,饼图
- 各类别课程 免费/付费 数量 柱状图
- 各类别课程的价格分布
- 各等级 免费/付费 数量
- 价格的 cdf、柱状图、BOX图分布
- 各年份的各类别发布量
- 各课程类别的课程关键字云图
运行
直接浏览器打开 index.html 文件即可,支持 file 协议。
源码
源数据有 3684 条,因此 data.js 文件比较大
index.html
<!DOCTYPE html>
<html lang="cn-zh">
<head>
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
<script src="https://cdn.bootcss.com/echarts/4.7.0/echarts.min.js"></script>
<!-- <script src="https://unpkg.com/axios/dist/axios.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/vue"></script> -->
<!-- <link rel="stylesheet" href="https://unpkg.com/element-ui/lib/theme-chalk/index.css">
<script src="https://unpkg.com/element-ui/lib/index.js"></script> -->
<script src="https://cdn.jsdelivr.net/npm/echarts-wordcloud@1.1.3/dist/echarts-wordcloud.min.js"></script>
<!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.8.2/css/bulma.min.css"> -->
<title>Udemy 课程数据可视化</title>
</head>
<body>
<div id='head'>
<h2>Udemy 课程数据可视化</h2>
</div>
<div id='main'>
<div id="chart-1">
<!-- <h3>课程 免费/付费 数量</h3> -->
<div id='echart-1' style="height: 350px"></div>
</div>
<div>
<!-- <h3>各类别课程 免费/付费 数量</h3> -->
<div id='echart-2' style="height: 350px"></div>
</div>
<div>
<!-- <h3>各类别课程的价格分布</h3> -->
<div id='echart-3' style="height: 350px"></div>
</div>
<div>
<!-- <h3>各等级 免费/付费 数量</h3> -->
<div id='echart-4' style="height: 350px"></div>
</div>
<div>
<!-- <h3>价格的 cdf 柱状图 BOX图分布</h3> -->
<div id='echart-5' style="height: 350px"></div>
</div>
<div>
<!-- <h3>各年份的 各 类别发布量</h3> -->
<div id='echart-6' style="height: 350px"></div>
</div>
<div>
<!-- <h3>各课程类别的 课程标题词云图</h3> -->
<!-- <div id='echart-7' style="height: 700px"></div> -->
<div id='echart-7-1' style="height: 400px"></div>
<div id='echart-7-2' style="height: 400px"></div>
<div id='echart-7-3' style="height: 400px"></div>
<div id='echart-7-4' style="height: 400px"></div>
</div>
</div>
<style>
#head h2 {
color: firebrick;
text-align: center;
font-weight: bold;
}
</style>
<script src='data.js'></script>
<script src='main.js'></script>
</body>
</html>
main.js
/** udemy 课程数据可视化 公共 js 文件 */
/** Echarts Dom 操作 */
/** 课程 免费/付费 数量,饼图 */
function echart_1() {
let el = 'echart-1'
let pieChart = echarts.init(document.getElementById(el))
let option = {
title: {
left: '20%',
text: '免费/付费 课程数量对比'
},
tooltip: {
trigger: 'axis',
axisPointer: { // 坐标轴指示器,坐标轴触发有效
type: 'shadow' // 默认为直线,可选为:'line' | 'shadow'
}
},
legend: {},
series: [
{
name: '在线课程数量',
type: 'pie',
radius: ['50%', '75%'],
avoidLabelOverlap: false,
label: {
show: false,
position: 'center'
},
emphasis: {
label: {
show: true,
fontSize: 30,
fontWeight: 'bold'
}
},
labelLine: {
show: false
},
data: [
{ value: 3544, name: '付费课程' },
{ value: 600, name: '免费课程' }
]
}
],
}
pieChart.setOption(option)
}
/** 各类别课程 免费/付费 数量 柱状图 */
function echart_2() {
let el = 'echart-2'
let chart = echarts.init(document.getElementById(el))
let option = {
title: {
left: '20%',
text: '各主题的 免费/付费 课程数量'
},
legend: {},
tooltip: {
trigger: 'axis',
axisPointer: { // 坐标轴指示器,坐标轴触发有效
type: 'shadow' // 默认为直线,可选为:'line' | 'shadow'
}
},
xAxis: [
{
type: 'category',
name: '主题',
data: ['Business Finance', 'Graphic Design', 'Musical Instruments', 'Web Development']
}
],
yAxis: [
{
type: 'value',
name: '数量'
}
],
series: [
{
name: '付费',
type: 'bar',
data: [1099, 568, 634, 1067],
markPoint: {
data: [
{type: 'max', name: '最大值'},
{type: 'min', name: '最小值'}
]
}
},
{
name: '免费',
type: 'bar',
data: [96, 35, 46, 133],
markPoint: {
data: [
{ type: 'max', name: '最大值' },
{ type: 'min', name: '最小值' }
]
}
}
],
}
chart.setOption(option)
}
/** 各类别课程的价格分布 */
function echart_3() {
let el = 'echart-3'
let chart = echarts.init(document.getElementById(el))
let data = SUBJECT_PRICE_COUNT_ARRAY.map( ls => [ls[1], ls[0], ls[2]] )
let option = {
title: {
left: 'center',
text: '各主题课程的价格分布'
},
// legend: {},
tooltip: {
trigger: 'item',
axisPointer: { // 坐标轴指示器,坐标轴触发有效
type: 'shadow' // 默认为直线,可选为:'line' | 'shadow'
}
},
xAxis: {
type: 'category',
data: SUBJECT_PRICE_COUNT["price"],
name: '价格',
axisLine: {
show: false
}
},
yAxis: [
{
type: 'category',
data: SUBJECTS,
axisLine: {
show: false
},
boundaryGap: false,
splitLine: {
show: true,
lineStyle: {
color: '#999',
type: 'dashed'
}
},
name: '主题'
},
],
series: [
{
name: '课程数量',
type: 'scatter',
areaStyle: {},
data: data,
symbolSize: function (val) {
// return Math.log2(val[2]) * 6;
return val[2] / 3
},
animationDelay: function (idx) {
return idx * 5;
},
tooltip: {
// formatter: '价格: {a} {b} <br/>个数:{c[2]}'
}
},
],
}
// option.xAxis.data = subject_price_counts.price
// option.series[0].data = subject_price_counts["Business Finance"]
// option.series[1].data = subject_price_counts["Graphic Design"]
chart.setOption(option)
}
/** 各等级 免费/付费 数量 */
function echart_4() {
let el = 'echart-4'
let chart = echarts.init(document.getElementById(el))
let option = {
title: {
left: '20%',
text: '各课程难度的 免费/付费 课程数量'
},
tooltip: {
trigger: 'axis',
axisPointer: { // 坐标轴指示器,坐标轴触发有效
type: 'shadow' // 默认为直线,可选为:'line' | 'shadow'
}
},
legend: {},
xAxis: [
{
type: 'category',
data: ['All Levels', 'Beginner Level', 'Intermediate Level', 'Expert Level'],
name: '课程难度'
}
],
yAxis: [
{
type: 'value',
name: '数量'
}
],
series: [
{
name: '付费',
type: 'bar',
data: [1807, 1112, 391, 58,],
markPoint: {
data: [
{ type: 'max', name: '最大值' },
{ type: 'min', name: '最小值' }
]
}
},
{
name: '免费',
type: 'bar',
data: [122, 158, 30, 0],
markPoint: {
data: [
{ type: 'max', name: '最大值' },
{ type: 'min', name: '最小值' }
]
}
}
],
}
chart.setOption(option)
}
/** 价格的 cdf 柱状图 BOX图分布 */
function echart_5() {
let el = 'echart-5'
let chart = echarts.init(document.getElementById(el))
let data = PRICE_CDF.x.map( (n, i) => [n, PRICE_CDF.y[i]])
let option = {
title: {
left: '20%',
text: '在线课程的价格分布如何'
},
legend: {},
tooltip: {
trigger: 'axis',
axisPointer: { // 坐标轴指示器,坐标轴触发有效
type: 'shadow' // 默认为直线,可选为:'line' | 'shadow'
}
},
grid: [
{ x: '5%', width: '25%'},
{ x: '30%', width: '40%'},
{ x: '75%', width: '20%'},
],
xAxis: [
{
type: 'category',
data: SUBJECT_PRICE_COUNT.price,
gridIndex: 0,
name: '价格'
},
{
type: 'category',
data: PRICE_COUNT.x,
gridIndex: 1,
name: '价格区间'
},
{
type: 'category',
data: ['价格'],
splitArea: {
show: false
},
splitLine: {
show: false
},
gridIndex: 2,
}
],
yAxis: [
{
type: 'value',
gridIndex: 0,
name: '占总百分比'
},
{
type: 'value',
gridIndex: 1,
name: '数量'
},
{
type: 'value',
gridIndex: 2,
name: '价格'
}
],
series: [
{
name: 'CDF 指数',
type: 'line',
data: data,
// symbol: 'none',
xAxisIndex: 0,
yAxisIndex: 0
},
{
name: '普通统计 指数',
type: 'bar',
data: PRICE_COUNT.y,
xAxisIndex: 1,
yAxisIndex: 1
},
{
name:'四分位 指数',
type: 'boxplot',
data: [ [0.0, 20.0, 45.0, 95.0, 200.0] ],
xAxisIndex: 2,
yAxisIndex: 2
}
],
}
chart.setOption(option)
}
/** 各年份的 各 类别发布量 */
function echart_6() {
let el = 'echart-6'
let chart = echarts.init(document.getElementById(el))
let option = {
tooltip: {
trigger: 'axis',
axisPointer: { // 坐标轴指示器,坐标轴触发有效
type: 'shadow' // 默认为直线,可选为:'line' | 'shadow'
}
},
legend: {},
title: {
left: '10%',
text: '各主题课程最近几年的发布量'
},
xAxis: [
{
type: 'value',
name: '发布量'
},
],
yAxis: [
{
type: 'category',
data: SUBJECT_YEAR_COUNT.year,
name: '主题'
},
],
series: [
{
name: "Web Development",
type: 'bar',
stack: '总量',
data: SUBJECT_YEAR_COUNT["Web Development"],
label: {
show: true,
position: 'insideRight'
}
},
{
name: "Business Finance",
type: 'bar',
stack: '总量',
data: SUBJECT_YEAR_COUNT["Business Finance"] ,
label: {
show: true,
position: 'insideRight'
},
},
{
name: "Graphic Design",
type: 'bar',
stack: '总量',
data: SUBJECT_YEAR_COUNT["Graphic Design"],
label: {
show: true,
position: 'insideRight'
},
},
{
name: "Musical Instruments",
type: 'bar',
stack: '总量',
data: SUBJECT_YEAR_COUNT["Musical Instruments"],
label: {
show: true,
position: 'insideRight'
},
},
],
}
chart.setOption(option)
}
function create_wordclound(el, title, data){
let chart = echarts.init(document.getElementById(el))
let option = {
title: {
left: 'center',
text: title,
},
series: [
{
type: 'wordCloud',
shape: 'circle',
// maskImage: maskImage,
left: 'center',
top: 'center',
width: '70%',
height: '85%',
sizeRange: [12, 60],
rotationRange: [-90, 90],
rotationStep: 45,
gridSize: 8,
drawOutOfBound: false,
textStyle: {
normal: {
fontFamily: 'sans-serif',
fontWeight: 'bold',
// Color can be a callback function or a color string
color: function () {
// Random color
return 'rgb(' + [
Math.round(Math.random() * 160),
Math.round(Math.random() * 160),
Math.round(Math.random() * 160)
].join(',') + ')';
}
},
emphasis: {
shadowBlur: 10,
shadowColor: '#333'
}
},
data: data
},
],
}
chart.setOption(option)
}
/** 各课程类别的 课程 关键字云图 */
function echart_7() {
create_wordclound('echart-7-1', 'Web Development 主题的关键字分布', SUBJECT_WORD_COUNT["Web Development"])
create_wordclound('echart-7-2', 'Business Finance 主题的关键字分布', SUBJECT_WORD_COUNT["Business Finance"])
create_wordclound('echart-7-3', 'Graphic Design 主题的关键字分布', SUBJECT_WORD_COUNT["Graphic Design"])
create_wordclound('echart-7-4', 'Musical Instruments 主题的关键字分布', SUBJECT_WORD_COUNT["Musical Instruments"])
}
function initAllCharts() {
echart_1()
echart_2()
echart_3()
echart_4()
echart_5()
echart_6()
echart_7()
}
initAllCharts()
data.js
/** 数据 */
const NUM_IS_PAID = {
columns: ['true', 'false'],
values: [
[3544, 600]
]
}
const SUBJECT_PRICE_COUNT = { "Business Finance": [96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 299, 0, 0, 0, 0, 54, 0, 0, 0, 0, 49, 0, 0, 0, 0, 29, 0, 0, 0, 0, 50, 0, 0, 0, 0, 23, 0, 0, 0, 0, 163, 0, 0, 0, 0, 7, 0, 0, 0, 0, 33, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 19, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 52, 0, 0, 0, 0, 27, 0, 0, 0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0, 0, 5, 0, 0, 0, 0, 9, 0, 0, 0, 0, 13, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 9, 0, 0, 0, 0, 32, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 10, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 33, 0, 0, 0, 0, 128], "Graphic Design": [35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 203, 0, 0, 0, 0, 36, 0, 0, 0, 0, 44, 0, 0, 0, 0, 20, 0, 0, 0, 0, 26, 0, 0, 0, 0, 14, 0, 0, 0, 0, 41, 0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 6, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 23, 0, 0, 0, 0, 23, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 15, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 17, 0, 0, 0, 0, 35], "Musical Instruments": [46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,