大家好!
今天来分享一下如何用Python工具来帮助我们制作散点图、柱状图展示科比·布莱恩特职业生涯场上不同位置命中率。
案例中数据表格结构如下:
代码及演示:
import numpy as np
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import *
from pyecharts.globals import ThemeType
df_Kobe = pd.read_csv("D:01会Excel的隔壁老王Excel报表20200516-科比职业生涯数据Kobe-data.csv")
df_Kobe
#选择"shot_made_flag"不为空的所有数据
df_Kobe_notnull = df_Kobe[ pd.notnull(df_Kobe["shot_made_flag"]) ]
df_Kobe_notnull
#数据透视,求出不同区域的总投篮数
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_total = pd.pivot_table(df_Kobe_notnull, index = ["shot_zone_range"], values= ["shot_zone_area"], aggfunc=np.count_nonzero)
#拆分合并单元格
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_total.reset_index(inplace = True)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_total
#数据透视,得出所有进球数
df_Kobe_notnull_shot_zone_basic_pivot_table = pd.pivot_table(df_Kobe_notnull, index = ["shot_zone_range","shot_made_flag"], values= ["shot_type"], aggfunc=np.count_nonzero)
#拆分合并单元格
df_Kobe_notnull_shot_zone_basic_pivot_table.reset_index(inplace = True)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1 = df_Kobe_notnull_shot_zone_basic_pivot_table[ df_Kobe_notnull_shot_zone_basic_pivot_table.shot_made_flag == 1 ]
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1
#数据透视,求出不同位置投篮命中率
df_Kobe_notnull_shot_zone_basic_pivot_table = pd.pivot_table(df_Kobe_notnull, index = ["shot_zone_range","shot_made_flag"], values= ["shot_type"], aggfunc=np.count_nonzero)
#拆分合并单元格
df_Kobe_notnull_shot_zone_basic_pivot_table.reset_index(inplace = True)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1 = df_Kobe_notnull_shot_zone_basic_pivot_table[ df_Kobe_notnull_shot_zone_basic_pivot_table.shot_made_flag == 1 ]
#匹配_
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge = pd.merge(df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1,df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_total)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"] = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["shot_type"] / df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["shot_zone_area"]
#保留两位小数
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"] = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"].round(decimals = 4)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率(百分比)"] = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"].apply(lambda x: format(x*100,"0"))
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge.sort_values(by = "投篮命中率", ascending=False)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge
#画柱状图
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.faker import Faker
from pyecharts.commons.utils import JsCode
c = (
Bar(init_opts=opts.InitOpts(width="900px", height="450px",theme = ThemeType.CHALK ))
.add_xaxis(df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["shot_zone_range"].values.tolist())
.add_yaxis("投篮命中率", df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率(百分比)"].values.tolist(), category_gap="60%", label_opts= opts.LabelOpts(formatter = "{c} %",margin=0))
.set_global_opts(
title_opts=opts.TitleOpts(title="科比场上不同位置命中率", subtitle="(微信公众号:会Excel的隔壁老王)",pos_bottom = "83%", pos_right = "38%"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter = "{value} %") ))
#.render("bar_base.html")
)
c.render_notebook()
#画散点图
import pyecharts.options as opts
from pyecharts.charts import Scatter
from pyecharts.globals import ThemeType
c1=(
Scatter(init_opts=opts.InitOpts(width="700px", height="1300px",bg_color="#ffffff"))
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Less Than 8 ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="Less Than 8 ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Less Than 8 ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "8-16 ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="8-16 ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "8-16 ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "16-24 ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="16-24 ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "16-24 ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "24+ ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="24+ ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "24+ ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Back Court Shot"]["loc_x"].values.tolist())
.add_yaxis(
series_name="Back Court Shot",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Back Court Shot"]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.set_series_opts()
.set_global_opts(
xaxis_opts=opts.AxisOpts(
type_="value", splitline_opts=opts.SplitLineOpts(is_show=0)
),
yaxis_opts=opts.AxisOpts(
type_="value",
axistick_opts=opts.AxisTickOpts(is_show=0),
splitline_opts=opts.SplitLineOpts(is_show=0),
),
tooltip_opts=opts.TooltipOpts(is_show=0),
title_opts=opts.TitleOpts(title="Python散点图展示科比场上不同位置全部投球位置n (微信公众号:会Excel的隔壁老王)",pos_bottom = "15%", pos_right = "22%"),
legend_opts=opts.LegendOpts( pos_top = "85%"),
)
#.render("basic_scatter_chart-1.html")
)
c1.render_notebook()
完整python代码参考:
import pyecharts.options as opts
from pyecharts.charts import *
from pyecharts.globals import ThemeType
from pyecharts.faker import Faker
from pyecharts.commons.utils import JsCode
import numpy as np
import pandas as pd
df_Kobe = pd.read_csv("D:01会Excel的隔壁老王Excel报表20200516-科比职业生涯数据Kobe-data.csv")
#选择"shot_made_flag"不为空的所有数据
df_Kobe_notnull = df_Kobe[ pd.notnull(df_Kobe["shot_made_flag"]) ]
#数据透视,求出不同区域的总投篮数
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_total = pd.pivot_table(df_Kobe_notnull, index = ["shot_zone_range"], values= ["shot_zone_area"], aggfunc=np.count_nonzero)
#拆分合并单元格
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_total.reset_index(inplace = True)
#数据透视,得出所有进球数
df_Kobe_notnull_shot_zone_basic_pivot_table = pd.pivot_table(df_Kobe_notnull, index = ["shot_zone_range","shot_made_flag"], values= ["shot_type"], aggfunc=np.count_nonzero)
#拆分合并单元格
df_Kobe_notnull_shot_zone_basic_pivot_table.reset_index(inplace = True)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1 = df_Kobe_notnull_shot_zone_basic_pivot_table[ df_Kobe_notnull_shot_zone_basic_pivot_table.shot_made_flag == 1 ]
#数据透视,求出不同位置投篮命中率
df_Kobe_notnull_shot_zone_basic_pivot_table = pd.pivot_table(df_Kobe_notnull, index = ["shot_zone_range","shot_made_flag"], values= ["shot_type"], aggfunc=np.count_nonzero)
#拆分合并单元格
df_Kobe_notnull_shot_zone_basic_pivot_table.reset_index(inplace = True)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1 = df_Kobe_notnull_shot_zone_basic_pivot_table[ df_Kobe_notnull_shot_zone_basic_pivot_table.shot_made_flag == 1 ]
#匹配_
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge = pd.merge(df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1,df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_total)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"] = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["shot_type"] / df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["shot_zone_area"]
#保留两位小数
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"] = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"].round(decimals = 4)
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率(百分比)"] = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率"].apply(lambda x: format(x*100,"0"))
df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge = df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge.sort_values(by = "投篮命中率", ascending=False)
#画柱状图
c = (
Bar(init_opts=opts.InitOpts(width="900px", height="450px",theme = ThemeType.CHALK ))
.add_xaxis(df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["shot_zone_range"].values.tolist())
.add_yaxis("投篮命中率", df_Kobe_notnull_shot_zone_basic_pivot_table_shot_made_flag_1_merge["投篮命中率(百分比)"].values.tolist(), category_gap="60%", label_opts= opts.LabelOpts(formatter = "{c} %",margin=0))
.set_global_opts(
title_opts=opts.TitleOpts(title="科比场上不同位置命中率", subtitle="(微信公众号:会Excel的隔壁老王)",pos_bottom = "83%", pos_right = "38%"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter = "{value} %") ))
#.render("bar_base.html")
)
#c.render_notebook()
#画散点图
c1=(
Scatter(init_opts=opts.InitOpts(width="700px", height="1300px",bg_color="#ffffff"))
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Less Than 8 ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="Less Than 8 ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Less Than 8 ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "8-16 ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="8-16 ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "8-16 ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "16-24 ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="16-24 ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "16-24 ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "24+ ft."]["loc_x"].values.tolist())
.add_yaxis(
series_name="24+ ft.",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "24+ ft."]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.add_xaxis(xaxis_data=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Back Court Shot"]["loc_x"].values.tolist())
.add_yaxis(
series_name="Back Court Shot",
y_axis=df_Kobe_notnull[ df_Kobe_notnull.shot_zone_range == "Back Court Shot"]["loc_y"].values.tolist(),
symbol_size=2,
label_opts=opts.LabelOpts(is_show=0),
#color="#542481",
)
.set_series_opts()
.set_global_opts(
xaxis_opts=opts.AxisOpts(
type_="value", splitline_opts=opts.SplitLineOpts(is_show=0)
),
yaxis_opts=opts.AxisOpts(
type_="value",
axistick_opts=opts.AxisTickOpts(is_show=0),
splitline_opts=opts.SplitLineOpts(is_show=0),
),
tooltip_opts=opts.TooltipOpts(is_show=0),
title_opts=opts.TitleOpts(title="Python散点图展示科比场上不同位置全部投球位置n (微信公众号:会Excel的隔壁老王)",pos_bottom = "15%", pos_right = "22%"),
legend_opts=opts.LegendOpts( pos_top = "85%"),
)
#.render("basic_scatter_chart-1.html")
)
#c1.render_notebook()
page = Page()
page.add(
c,
c1,
)
#page.render("page_draggable_layout.html")
page.render_notebook()