Udemy 课程数据可视化 | Pandas & Echarts & Echarts-wordcloud

对 Udemy 在线网络课程进行数据分析与可视化展现,数据包含了2011年到2017年的所有课程数据,每个课程数据包含了课程名称、价格、学科、评论数量、订阅数量等信息。用到了 pandas 和 echarts。

数据来源: Udemy Courses - Kaggle


开发流程

利用 Jupyter Notebook 对数据进行预处理、EDA和导出数据

  1. 加载数据,查看数据基本信息,必要的数据预处理部分
  2. 探索性数据分析,设定问题,找出数据包含的信息、
  3. 同步与可视乎阶段进行,根据 Echarts 配置参数中 data 等属性的需要,用 Python 导出合适的 JavaScript 数组/对象 数据,方便 Echarts 使用

利用 Echarts库 编写可视化组件

  1. 新建 HTML 文件 index.html,初始化基本信息,引入必要的 JS 库
  2. 确定可视化组件的内容,确定布局位置,这些都在 HTML 文件中完成
  3. 新建 main.js 文件,使用 echart 配置各种所需图表,为了整洁型,封装成函数
  4. 需要的数据全部封装到 data.js 文件中,方便 main.js 引用
  5. 反复打开 index.html 进行测试,确保功能都正常显示

可视化组件

  • 课程 免费/付费 数量,饼图
  • 各类别课程 免费/付费 数量 柱状图
  • 各类别课程的价格分布
  • 各等级 免费/付费 数量
  • 价格的 cdf、柱状图、BOX图分布
  • 各年份的各类别发布量
  • 各课程类别的课程关键字云图

运行

直接浏览器打开 index.html 文件即可,支持 file 协议。

源码

源数据有 3684 条,因此 data.js 文件比较大

index.html

<!DOCTYPE html>
<html lang="cn-zh">

<head>
    <meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
    <script src="https://cdn.bootcss.com/echarts/4.7.0/echarts.min.js"></script>
    <!-- <script src="https://unpkg.com/axios/dist/axios.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/vue"></script> -->

    <!-- <link rel="stylesheet" href="https://unpkg.com/element-ui/lib/theme-chalk/index.css">
    <script src="https://unpkg.com/element-ui/lib/index.js"></script> -->
    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/echarts-wordcloud.min.js"></script>
    <!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bulma.min.css"> -->
    <title>Udemy 课程数据可视化</title>
</head>

<body>

    <div id='head'>
        <h2>Udemy 课程数据可视化</h2>
    </div>

    <div id='main'>

        <div id="chart-1">
            <!-- <h3>课程 免费/付费 数量</h3> -->
            <div id='echart-1' style="height: 350px"></div>
        </div>

        <div>
            <!-- <h3>各类别课程 免费/付费 数量</h3> -->
            <div id='echart-2' style="height: 350px"></div>
        </div>

        <div>
            <!-- <h3>各类别课程的价格分布</h3> -->
            <div id='echart-3' style="height: 350px"></div>

        </div>

        <div>
            <!-- <h3>各等级 免费/付费 数量</h3> -->
            <div id='echart-4' style="height: 350px"></div>
        </div>

        <div>
            <!-- <h3>价格的 cdf 柱状图 BOX图分布</h3> -->
            <div id='echart-5' style="height: 350px"></div>

        </div>

        <div>
            <!-- <h3>各年份的 各 类别发布量</h3> -->
            <div id='echart-6' style="height: 350px"></div>
        </div>

        <div>
            <!-- <h3>各课程类别的 课程标题词云图</h3> -->
            <!-- <div id='echart-7' style="height: 700px"></div> -->
            <div id='echart-7-1' style="height: 400px"></div>
            <div id='echart-7-2' style="height: 400px"></div>
            <div id='echart-7-3' style="height: 400px"></div>
            <div id='echart-7-4' style="height: 400px"></div>


        </div>
    </div>

    <style>
        #head h2 {
            color: firebrick;
            text-align: center;
            font-weight: bold;
        }
    </style>
    <script src='data.js'></script>
    <script src='main.js'></script>

</body>

</html>

main.js

/** udemy 课程数据可视化 公共 js 文件 */

/** Echarts Dom 操作 */

/** 课程 免费/付费 数量,饼图  */
function echart_1() {
    let el = 'echart-1'
    let pieChart = echarts.init(document.getElementById(el))
    let option = {
        title: {
            left: '20%',
            text: '免费/付费 课程数量对比'
        },
        tooltip: {
            trigger: 'axis',
            axisPointer: {            // 坐标轴指示器,坐标轴触发有效
                type: 'shadow'        // 默认为直线,可选为:'line' | 'shadow'
            }
        },
        legend: {},
        series: [
            {
                name: '在线课程数量',
                type: 'pie',
                radius: ['50%', '75%'],
                avoidLabelOverlap: false,
                label: {
                    show: false,
                    position: 'center'
                },
                emphasis: {
                    label: {
                        show: true,
                        fontSize: 30,
                        fontWeight: 'bold'
                    }
                },
                labelLine: {
                    show: false
                },
                data: [
                    { value: 3544, name: '付费课程' },
                    { value: 600, name: '免费课程' }
                ]
            }
        ],
    }
    pieChart.setOption(option)
}

/** 各类别课程 免费/付费 数量 柱状图 */
function echart_2() {
    let el = 'echart-2'
    let chart = echarts.init(document.getElementById(el))
    let option = {
        title: {
            left: '20%',
            text: '各主题的 免费/付费 课程数量'
        },
        legend: {},
        tooltip: {
            trigger: 'axis',
            axisPointer: {            // 坐标轴指示器,坐标轴触发有效
                type: 'shadow'        // 默认为直线,可选为:'line' | 'shadow'
            }
        },
        xAxis: [
            {
                type: 'category',
                name: '主题',
                data: ['Business Finance', 'Graphic Design', 'Musical Instruments', 'Web Development']
            }
        ],
        yAxis: [
            {
                type: 'value',
                name: '数量'
            }
        ],
        series: [
            {
                name: '付费',
                type: 'bar',
                data: [1099, 568, 634, 1067],
                markPoint: {
                    data: [
                        {type: 'max', name: '最大值'},
                        {type: 'min', name: '最小值'}
                    ]
                }
            },
            {
                name: '免费',
                type: 'bar',
                data: [96, 35, 46, 133],
                markPoint: {
                    data: [
                        { type: 'max', name: '最大值' },
                        { type: 'min', name: '最小值' }
                    ]
                }
            }
        ],

    }
    chart.setOption(option)
}

/** 各类别课程的价格分布 */
function echart_3() {
    let el = 'echart-3'
    let chart = echarts.init(document.getElementById(el))
    let data = SUBJECT_PRICE_COUNT_ARRAY.map( ls => [ls[1], ls[0], ls[2]] )
    let option = {
        title: {
            left: 'center',
            text: '各主题课程的价格分布'
        },
        // legend: {},
        tooltip: {
            trigger: 'item',
            axisPointer: {            // 坐标轴指示器,坐标轴触发有效
                type: 'shadow'        // 默认为直线,可选为:'line' | 'shadow'
            }
        },
        xAxis: {
            type: 'category',
            data: SUBJECT_PRICE_COUNT["price"],
            name: '价格',
            axisLine: {
                show: false
            }
        },
        yAxis: [
            {
                type: 'category',
                data: SUBJECTS,
                axisLine: {
                    show: false
                },
                boundaryGap: false,
                splitLine: {
                    show: true,
                    lineStyle: {
                        color: '#999',
                        type: 'dashed'
                    }
                },
                name: '主题'
            },
            
        ],
        series: [
            {
                name: '课程数量',
                type: 'scatter',
                areaStyle: {},
                data: data,
                symbolSize: function (val) {
                    // return Math.log2(val[2]) * 6;
                    return val[2] / 3
                },
                animationDelay: function (idx) {
                    return idx * 5;
                },
                tooltip: {
                    // formatter: '价格: {a} {b} <br/>个数:{c[2]}'
                }
            },
        ],

    }

    // option.xAxis.data = subject_price_counts.price
    // option.series[0].data = subject_price_counts["Business Finance"]
    // option.series[1].data = subject_price_counts["Graphic Design"]

    chart.setOption(option)
}

/** 各等级 免费/付费 数量 */
function echart_4() {
    let el = 'echart-4'
    let chart = echarts.init(document.getElementById(el))
    let option = {
        title: {
            left: '20%',
            text: '各课程难度的 免费/付费 课程数量'
        },
        tooltip: {
            trigger: 'axis',
            axisPointer: {            // 坐标轴指示器,坐标轴触发有效
                type: 'shadow'        // 默认为直线,可选为:'line' | 'shadow'
            }
        },
        legend: {},

        xAxis: [
            {
                type: 'category',
                data: ['All Levels', 'Beginner Level', 'Intermediate Level', 'Expert Level'],
                name: '课程难度'
            }
        ],
        yAxis: [
            {
                type: 'value',
                name: '数量'
            }
        ],
        series: [
            {
                name: '付费',
                type: 'bar',
                data: [1807, 1112, 391, 58,],
                markPoint: {
                    data: [
                        { type: 'max', name: '最大值' },
                        { type: 'min', name: '最小值' }
                    ]
                }
            },
            {
                name: '免费',
                type: 'bar',
                data: [122, 158, 30, 0],
                markPoint: {
                    data: [
                        { type: 'max', name: '最大值' },
                        { type: 'min', name: '最小值' }
                    ]
                }
            }
        ],

    }
    chart.setOption(option)
}

/** 价格的 cdf 柱状图 BOX图分布 */
function echart_5() {
    let el = 'echart-5'
    let chart = echarts.init(document.getElementById(el))
    let data = PRICE_CDF.x.map( (n, i) => [n, PRICE_CDF.y[i]])
    let option = {
        title: {
            left: '20%',
            text: '在线课程的价格分布如何'
        },
        legend: {},

        tooltip: {
            trigger: 'axis',
            axisPointer: {            // 坐标轴指示器,坐标轴触发有效
                type: 'shadow'        // 默认为直线,可选为:'line' | 'shadow'
            }
        },
        grid: [
            { x: '5%', width: '25%'},
            { x: '30%', width: '40%'},
            { x: '75%', width: '20%'},
        ],
        xAxis: [
            {
                type: 'category',
                data: SUBJECT_PRICE_COUNT.price,
                gridIndex: 0,
                name: '价格'
            },
            {
                type: 'category',
                data: PRICE_COUNT.x,
                gridIndex: 1,
                name: '价格区间'
            },
            {
                type: 'category',
                data: ['价格'],
                splitArea: {
                    show: false
                },
                splitLine: {
                    show: false
                },
                gridIndex: 2,
            }
        ],
        yAxis: [
            {
                type: 'value',
                gridIndex: 0,
                name: '占总百分比'
            },
            {
                type: 'value',
                gridIndex: 1,
                name: '数量'
            },
            {
                type: 'value',
                gridIndex: 2,
                name: '价格'
            }
        ],
        series: [
            {
                name: 'CDF 指数',
                type: 'line',
                data: data,
                // symbol: 'none',
                xAxisIndex: 0,
                yAxisIndex: 0
            },
            {
                name: '普通统计 指数',
                type: 'bar',
                data: PRICE_COUNT.y,
                xAxisIndex: 1,
                yAxisIndex: 1
            },
            {
                name:'四分位 指数',
                type: 'boxplot',
                data: [ [0.0, 20.0, 45.0, 95.0, 200.0] ],
                xAxisIndex: 2,
                yAxisIndex: 2

            }
        ],

    }
    chart.setOption(option)
}

/** 各年份的 各 类别发布量 */
function echart_6() {
    let el = 'echart-6'
    let chart = echarts.init(document.getElementById(el))
    let option = {
        tooltip: {
            trigger: 'axis',
            axisPointer: {            // 坐标轴指示器,坐标轴触发有效
                type: 'shadow'        // 默认为直线,可选为:'line' | 'shadow'
            }
        },
        legend: {},

        title: {
            left: '10%',
            text: '各主题课程最近几年的发布量'
        },
        xAxis: [
            {
                type: 'value',
                name: '发布量'
            },
        ],
        yAxis: [
            {
                type: 'category',
                data: SUBJECT_YEAR_COUNT.year,
                name: '主题'
            },
        ],
        series: [
            {
                name: "Web Development",
                type: 'bar',
                stack: '总量',
                data: SUBJECT_YEAR_COUNT["Web Development"],
                label: {
                    show: true,
                    position: 'insideRight'
                }
            },
            {
                name: "Business Finance",
                type: 'bar',
                stack: '总量',
                data: SUBJECT_YEAR_COUNT["Business Finance"] ,
                label: {
                    show: true,
                    position: 'insideRight'
                },
            },
            {
                name: "Graphic Design",
                type: 'bar',
                stack: '总量',
                data: SUBJECT_YEAR_COUNT["Graphic Design"],
                label: {
                    show: true,
                    position: 'insideRight'
                },
            },
            {
                name: "Musical Instruments",
                type: 'bar',
                stack: '总量',
                data: SUBJECT_YEAR_COUNT["Musical Instruments"],
                label: {
                    show: true,
                    position: 'insideRight'
                },
            },
        ],

    }
    chart.setOption(option)

}

function create_wordclound(el, title, data){
    let chart = echarts.init(document.getElementById(el))
    let option = {
        title: {
            left: 'center',
            text: title,
        },
        series: [
            {
                type: 'wordCloud',
                shape: 'circle',
                // maskImage: maskImage,
                left: 'center',
                top: 'center',
                width: '70%',
                height: '85%',
                sizeRange: [12, 60],
                rotationRange: [-90, 90],
                rotationStep: 45,
                gridSize: 8,
                drawOutOfBound: false,

                textStyle: {
                    normal: {
                        fontFamily: 'sans-serif',
                        fontWeight: 'bold',
                        // Color can be a callback function or a color string
                        color: function () {
                            // Random color
                            return 'rgb(' + [
                                Math.round(Math.random() * 160),
                                Math.round(Math.random() * 160),
                                Math.round(Math.random() * 160)
                            ].join(',') + ')';
                        }
                    },
                    emphasis: {
                        shadowBlur: 10,
                        shadowColor: '#333'
                    }
                },
                data: data
            },
        ],

    }
    chart.setOption(option)
}

/** 各课程类别的 课程 关键字云图 */
function echart_7() {
    create_wordclound('echart-7-1', 'Web Development 主题的关键字分布', SUBJECT_WORD_COUNT["Web Development"])
    create_wordclound('echart-7-2', 'Business Finance 主题的关键字分布', SUBJECT_WORD_COUNT["Business Finance"])
    create_wordclound('echart-7-3', 'Graphic Design 主题的关键字分布', SUBJECT_WORD_COUNT["Graphic Design"])
    create_wordclound('echart-7-4', 'Musical Instruments 主题的关键字分布', SUBJECT_WORD_COUNT["Musical Instruments"])
}

function initAllCharts() {
    echart_1()
    echart_2()
    echart_3()
    echart_4()
    echart_5()
    echart_6()
    echart_7()
}

initAllCharts()

data.js 

/** 数据 */

const NUM_IS_PAID = {
    columns: ['true', 'false'],
    values: [
        [3544, 600]
    ]
}

const SUBJECT_PRICE_COUNT = { "Business Finance": [96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 299, 0, 0, 0, 0, 54, 0, 0, 0, 0, 49, 0, 0, 0, 0, 29, 0, 0, 0, 0, 50, 0, 0, 0, 0, 23, 0, 0, 0, 0, 163, 0, 0, 0, 0, 7, 0, 0, 0, 0, 33, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 19, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 52, 0, 0, 0, 0, 27, 0, 0, 0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0, 0, 5, 0, 0, 0, 0, 9, 0, 0, 0, 0, 13, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 9, 0, 0, 0, 0, 32, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 10, 0, 0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 33, 0, 0, 0, 0, 128], "Graphic Design": [35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 203, 0, 0, 0, 0, 36, 0, 0, 0, 0, 44, 0, 0, 0, 0, 20, 0, 0, 0, 0, 26, 0, 0, 0, 0, 14, 0, 0, 0, 0, 41, 0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 6, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 23, 0, 0, 0, 0, 23, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 15, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 17, 0, 0, 0, 0, 35], "Musical Instruments": [46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值