文件noc_regions.csv中包含3个字段,具体信息如下:
4.数据来源
数据集源自于kaggle平台用户分享,
基于证书 CC0: Public Domain 发布,
具体信息内容源自
https://www.sports-reference.com/
二、数据集可探索、研究的方向
可以从以下几个方面来探索奥林匹克运动会的演变历程:
- 历年来 男女参赛运动员的表现如何?
- 那不同地区?
- 不同运动项目?
- 不同比赛项目?
三、可视化分析
1.🏆各国累计奖牌数
import pandas as pd
from pyecharts.charts import \*
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
athlete_data = pd.read_csv('./data/athlete\_events.csv')
noc_region = pd.read_csv('./data/noc\_regions.csv')
# 关联代表国家
data = pd.merge(athlete_data, noc_region, on='NOC', how='left')
print(data.head())
medal_data = data.groupby(['Year', 'Season', 'region', 'Medal'])['Event'].nunique().reset_index()
medal_data.columns = ['Year', 'Season', 'region', 'Medal', 'Nums']
medal_data = medal_data.sort_values(by="Year", ascending=True)
def medal\_stat(year, season='Summer'):
t_data = medal_data[(medal_data['Year'] <= year) & (medal_data['Season'] == season)]
t_data = t_data.groupby(['region', 'Medal'])['Nums'].sum().reset_index()
t_data = t_data.set_index(['region', 'Medal']).unstack().reset_index().fillna(0, inplace=False)
t_data = sorted(
[(row['region'][0], int(row['Nums']['Gold']), int(row['Nums']['Silver']), int(row['Nums']['Bronze']))
for _, row in t_data.iterrows()], key=lambda x: x[1] + x[2] + x[3], reverse=True)[:20]
return t_data
year_list = sorted(list(set(medal_data['Year'].to_list())), reverse=True)
tl = Timeline(init_opts=opts.InitOpts(theme='dark', width='1000px', height='1000px'))
tl.add_schema(is_timeline_show=True, is_rewind_play=True, is_inverse=False,
label_opts=opts.LabelOpts(is_show=False))
for year in year_list:
t_data = medal_stat(year)[::-1]
bar = (
Bar(init_opts=opts.InitOpts())
.add_xaxis([x[0] for x in t_data])
.add_yaxis("铜牌🥉", [x[3] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(218,165,32)'))
.add_yaxis("银牌🥈", [x[2] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(192,192,192)'))
.add_yaxis("金牌🏅️", [x[1] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(255,215,0)'))
.set_series_opts(label_opts=opts.LabelOpts(is_show=True,
position='insideRight',
font_style='italic'), )
.set_global_opts(
title_opts=opts.TitleOpts(title="各国累计奖牌数(夏季奥运会)"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)),
legend_opts=opts.LegendOpts(is_show=True),
graphic_opts=[opts.GraphicGroup(graphic_item=opts.GraphicItem(
rotation=JsCode("Math.PI / 4"),
bounding="raw",
right=110,
bottom=110,
z=100),
children=[
opts.GraphicRect(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_shape_opts=opts.GraphicShapeOpts(
width=400, height=50
),
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="rgba(0,0,0,0.3)"
),
),
opts.GraphicText(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_textstyle_opts=opts.GraphicTextStyleOpts(
text=year,
font="bold 26px Microsoft YaHei",
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="#fff"
),
),
),
],
)
], )
.reversal_axis())
tl.add(bar, year)
tl.render(r".\htmlRender\01\_各国累计奖牌数(夏季奥运会).html")
import pandas as pd
from pyecharts.charts import \*
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
athlete_data = pd.read_csv('./data/athlete\_events.csv')
noc_region = pd.read_csv('./data/noc\_regions.csv')
# 关联代表国家
data = pd.merge(athlete_data, noc_region, on='NOC', how='left')
print(data.head())
medal_data = data.groupby(['Year', 'Season', 'region', 'Medal'])['Event'].nunique().reset_index()
medal_data.columns = ['Year', 'Season', 'region', 'Medal', 'Nums']
medal_data = medal_data.sort_values(by="Year", ascending=True)
def medal\_stat(year, season='Summer'):
t_data = medal_data[(medal_data['Year'] <= year) & (medal_data['Season'] == season)]
t_data = t_data.groupby(['region', 'Medal'])['Nums'].sum().reset_index()
t_data = t_data.set_index(['region', 'Medal']).unstack().reset_index().fillna(0, inplace=False)
t_data = sorted(
[(row['region'][0], int(row['Nums']['Gold']), int(row['Nums']['Silver']), int(row['Nums']['Bronze']))
for _, row in t_data.iterrows()], key=lambda x: x[1] + x[2] + x[3], reverse=True)[:20]
return t_data
year_list = sorted(list(set(medal_data['Year'].to_list())), reverse=True)
tl = Timeline(init_opts=opts.InitOpts(theme='dark', width='1000px', height='1000px'))
tl.add_schema(is_timeline_show=True, is_rewind_play=True, is_inverse=False,
label_opts=opts.LabelOpts(is_show=False))
year_list = sorted(list(set(medal_data['Year'][medal_data.Season == 'Winter'].to_list())), reverse=True)
tl = Timeline(init_opts=opts.InitOpts(theme='dark', width='1000px', height='1000px'))
tl.add_schema(is_timeline_show=True, is_rewind_play=True, is_inverse=False,
label_opts=opts.LabelOpts(is_show=False))
for year in year_list:
t_data = medal_stat(year, 'Winter')[::-1]
bar = (
Bar(init_opts=opts.InitOpts(theme='dark'))
.add_xaxis([x[0] for x in t_data])
.add_yaxis("铜牌🥉", [x[3] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(218,165,32)'))
.add_yaxis("银牌🥈", [x[2] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(192,192,192)'))
.add_yaxis("金牌🏅️", [x[1] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(255,215,0)'))
.set_series_opts(label_opts=opts.LabelOpts(is_show=True,
position='insideRight',
font_style='italic'), )
.set_global_opts(
title_opts=opts.TitleOpts(title="各国累计奖牌数(冬季奥运会)"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)),
legend_opts=opts.LegendOpts(is_show=True),
graphic_opts=[opts.GraphicGroup(graphic_item=opts.GraphicItem(
rotation=JsCode("Math.PI / 4"),
bounding="raw",
right=110,
bottom=110,
z=100),
children=[
opts.GraphicRect(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_shape_opts=opts.GraphicShapeOpts(
width=400, height=50
),
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="rgba(0,0,0,0.3)"
),
),
opts.GraphicText(
### 最后
> **🍅 硬核资料**:关注即可领取PPT模板、简历模板、行业经典书籍PDF。
> **🍅 技术互助**:技术群大佬指点迷津,你的问题可能不是问题,求资源在群里喊一声。
> **🍅 面试题库**:由技术群里的小伙伴们共同投稿,热乎的大厂面试真题,持续更新中。
> **🍅 知识体系**:含编程语言、算法、大数据生态圈组件(Mysql、Hive、Spark、Flink)、数据仓库、Python、前端等等。
**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**
**[需要这份系统化学习资料的朋友,可以戳这里无偿获取](https://bbs.csdn.net/topics/618317507)**
**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**