2020年美国新冠肺炎疫情数据分析

# 美国疫情——大数据可视化

本教程涉及到最后可视化阶段,利用python的pyecharts对MySQL中的数据做分析

后面可以根据自己需求改一下sql语句就行


import pandas as pd
import mysql.connector
from pyecharts import options as opts
from pyecharts.charts import Bar, WordCloud, Funnel, Pie, Map
from pyecharts.charts import Line
from pyecharts.components import Table
from pyecharts.options import ComponentTitleOpts





db = mysql.connector.connect(
    host="xxx.xxx.xxx.xxx",
    port="3306",
    user="root",
    password="xxx",
    database="xxx"
)

try:
    # 获取会话指针
    with db.cursor() as cursor:
        #统计美国截止每日的累计确诊人数和累计死亡人数。做法是以date作为分组字段,对cases和deaths字段进行汇总统
        sql1 = "SELECT `date`, SUM(`cases`) AS `cases`, SUM(`deaths`) AS `deaths` FROM `us_county` GROUP BY `date`"
        cursor.execute(sql1)
        result = cursor.fetchall()
        df = pd.DataFrame(result, columns=['date', 'cases', 'deaths'])
        dates = df['date'].tolist()
        cases =list(df['cases'])
        deaths = list(df['deaths'])





        #统计美国每日的新增确诊人数和新增死亡人数。
        sql2 = "SELECT date, SUM(cases) AS new_confirmed_cases,SUM(deaths) AS new_deaths\
                FROM us_county \
                WHERE date <= '2020-05-19'\
                GROUP BY date\
                ORDER BY date "

        cursor.execute(sql2)
        result2 = cursor.fetchall()
        df2 = pd.DataFrame(result2, columns=['date', 'new_confirmed_cases', 'new_deaths'])
        dates2 = df2['date'].tolist()
        day_new_cases = list(df2['new_confirmed_cases'])
        day_new_deaths = list(df2['new_deaths'])



        #统计截止5.19日,美国各州的累计确诊人数和死亡人数和病死率
        sql3 = "SELECT state, SUM(cases) AS totalCases, SUM(deaths) AS totalDeaths, ROUND(SUM(deaths) / SUM(cases), 4) AS deathRate\
              FROM us_county\
              WHERE date <= '2020-05-19'\
              GROUP BY state;"
        cursor.execute(sql3)
        result3 = cursor.fetchall()
        df3 = pd.DataFrame(result3, columns=['state', 'totalCases', 'totalDeaths', 'deathRate'])
        states = list(df3['state'])
        totalCases = list(df3['totalCases'])
        totalDeaths = list(df3['totalDeaths'])
        deathRate = list(df3['deathRate'])



        #截止5.19,美国累计确诊人数前10的州
        sql4 = "select state, sum(cases) as top10_confirmed from us_county  \
               where date<='2020-05-19' \
               group by state  \
               order by top10_confirmed desc  \
               limit 10;"
        cursor.execute(sql4)
        result4 = cursor.fetchall()
        df4 = pd.DataFrame(result4, columns=['state', 'top10_confirmed'])
        top_states4 = list(df4['state'])
        top10_confirmed = list(df4['top10_confirmed'])
        top10_data=[]
        for i in range(len(top_states4)):
            row=[]
            row.append(str(top_states4[i]))
            row.append(str(top10_confirmed[i]))

            top10_data.append(row)


        # 将 row 列表的数据添加到 data 列表中
        # data = [
        #     ("New York", 13231107),
        #     ("New Jersey", 4883171),
        #     ("Massachusetts", 2475673),
        #     ("Illinois", 2316059),
        #     ("California", 2211456),
        #     ("Pennsylvania", 1970295),
        #     ("Michigan", 1753801),
        #     ("Florida", 1480283),
        #     ("Texas", 1282385),
        #     ("Louisiana", 1258141)
        # ]




finally:
    # 关闭数据库连接
    db.close()


##1.画出每日的累计确诊病例数和死亡数——>双柱状图
def test1():
    bar = (
        Bar()
           .add_xaxis(dates)
           .add_yaxis("累计确诊", cases)
           .add_yaxis("累计死亡", deaths)
           .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
           .set_global_opts(title_opts=opts.TitleOpts(title="美国每日累计确诊病例数和死亡数"))
    )

    bar.render('1美国每日累计确诊病例数和死亡数双柱状图.html')



##2.画出每日的新增确诊病例数和死亡数——>折线图
def test2():
    line = (
        Line()

           .add_xaxis(dates)
           .add_yaxis("新增确诊", day_new_cases)
           .add_yaxis("新增死亡", day_new_deaths)
           .set_series_opts(label_opts=opts.LabelOpts(is_show=False)))
           # .set_global_opts(title_opts=opts.TitleOpts(title="美国每日新增确诊病例数和死亡数"),)
    line.render('2美国每日新增确诊病例数和死亡数.html')


def test3():
    # 截止5.19,美国各州累计确诊、死亡人数和病死率--->表格
    all_state_data = []  # 初始化表格数据
    for i in range(len(states)):
        row = []  # 初始化行数据
        row.append(states[i])  # 添加州名
        row.append(totalCases[i])  # 添加累计确诊人数
        row.append(totalDeaths[i])  # 添加累计死亡人数
        row.append(deathRate[i])  # 添加病死率
        all_state_data.append(row)  # 添加到表格数据中

    table = Table()  # 创建表格

    headers = ["State name", "Total cases", "Total deaths", "Death rate"]  # 头部数据
    rows = all_state_data  # 行数据
    table.add(headers, rows)  # 添加数据
    table.set_global_opts(
        title_opts=ComponentTitleOpts(title="美国各州疫情一览", subtitle="")  # 标题选项
    )
    table.render('3美国各州疫情一览表.html')


    #截止5.19,美国累计确诊人数前10的州--->词云图
def test4():
    c = (
        WordCloud()
            .add("", top10_data, word_size_range=[20, 100])
            .set_global_opts(title_opts=opts.TitleOpts(title="美国各州确诊Top10"))

    )
    c.render('4美国各州确诊Top10词云图.html')





def test5():
    #漏斗图
    f=(
        Funnel()
        .add(
            "State",
            top10_data,
            sort_="ascending",
            label_opts=opts.LabelOpts(position="inside"),
        )
    # .set_global_opts(title_opts=opts.TitleOpts(title=""))
    )
    f.render('5美国各州确诊Top10漏斗图.html')

    p=(
        Pie()
       .add(
            "",
            top10_data,
            radius=["30%", "75%"],
            label_opts=opts.LabelOpts(formatter="{b}: {c}"),
        )
       .set_global_opts(title_opts=opts.TitleOpts(title=""))
    )
    p.render('6美国各州确诊Top10饼图.html')

    all_state_cases = []  # 每个州确诊人数
    all_state_deaths = []
    for i in range(len(states)):
        row = []  # 初始化行数据
        row1 = []
        row.append(states[i])  # 添加州名
        row1.append(states[i])
        row.append(totalCases[i])  # 添加累计确诊人数
        row1.append(totalDeaths[i])  # 添加累计死亡人数
        # row.append(deathRate[i])  # 添加病死率
        all_state_cases.append(row)
        all_state_deaths.append(row1)
    c1 = (
            Map(
                init_opts=opts.InitOpts(width="1400px", height="800px"),

                )
            .add("确诊人数", all_state_cases, "美国")
            .add("累计死亡人数", all_state_deaths, "美国")
            .set_global_opts(title_opts=opts.TitleOpts(title="美国各州确诊病例")

            )


        )
    c1.render('7美国各州确诊Top10地图.html')













def main():

    test1()
    test2()
    test3()
    test4()
    test5()


if __name__ == '__main__':
    main()  # 主程序执行

来源自2020年美国新冠肺炎疫情数据分析_厦大数据库实验室博客 (xmu.edu.cn)

  • 15
    点赞
  • 26
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值