Python+Pyecharts实现1995~2020世界人口分析报告

欧欧la

已于 2022-03-21 16:02:17 修改

阅读量5k

点赞数 3

文章标签： python 数据分析

于 2022-03-21 15:23:27 首次发布

本文链接：https://blog.csdn.net/ljz7799/article/details/123635157

版权

一项目说明

1 数据来源：阿里云天池数据集------1955年至2020年的国家人口
2 字段说明：
Data columns (total 14 columns):
Column Non-Null Count Dtype

0 Year 4195 non-null int64 -----年份
1 Country 4195 non-null object -----国家名称
2 Population 4195 non-null int64 -----人口总数
3 Yearly % Change 4195 non-null float64-----人口年变化率
4 Yearly Change 4195 non-null int64 -----人口年改变数量
5 Migrants (net) 3600 non-null float64-----移民人数
6 Median Age 3600 non-null float64-----年龄中位数
7 Fertility Rate 3600 non-null float64-----生育率
8 Density (P/Km²) 4195 non-null object -----人口密度（人/平方千米）
9 Urban Pop % 4082 non-null float64-----城镇人口比例
10 Urban Population 4082 non-null float64-----城镇人口数目
11 Country’s Share of World Pop % 4195 non-null float64-----国家人口数对世界人口总数的贡献率
12 World Population 4195 non-null int64 -----世界人口总数
13 Country Global Rank 4195 non-null int64 -----人口总数世界排名
dtypes: float64(7), int64(5), object(2)
memory usage: 459.0+ KB

二数据清洗

# 1 导入模块
import pandas as pd
import pyecharts.options as opts
from pyecharts.charts import Bar,Line,Grid,Map
from pyecharts.globals import ThemeType

# 2 数据清洗
df = pd.read_csv('D:\\LearningMaterials\Countries Population from 1995 to 2020.csv')
#验证读取是否成功
print(df.head())
#查看数据集索引、内存和数据类型
print(df.info())
#发现有空值，用0填充,直接在原始数据上进行修改。即inplca=True.否则无效
df.fillna(0,inplace=True)
#验证
print(df.isnull().sum())

运行结果：
Year Country … World Population Country Global Rank
0 2020 China … 7794798739 1
1 2019 China … 7713468100 1
2 2018 China … 7631091040 1
3 2017 China … 7547858925 1
4 2016 China … 7464022049 1

[5 rows x 14 columns]
<class ‘pandas.core.frame.DataFrame’>
RangeIndex: 4195 entries, 0 to 4194
Data columns (total 14 columns):
Column Non-Null Count Dtype

0 Year 4195 non-null int64
1 Country 4195 non-null object
2 Population 4195 non-null int64
3 Yearly % Change 4195 non-null float64
4 Yearly Change 4195 non-null int64
5 Migrants (net) 3600 non-null float64
6 Median Age 3600 non-null float64
7 Fertility Rate 3600 non-null float64
8 Density (P/Km²) 4195 non-null object
9 Urban Pop % 4082 non-null float64
10 Urban Population 4082 non-null float64
11 Country’s Share of World Pop % 4195 non-null float64
12 World Population 4195 non-null int64
13 Country Global Rank 4195 non-null int64
dtypes: float64(7), int64(5), object(2)
memory usage: 459.0+ KB
None
Year 0
Country 0
Population 0
Yearly % Change 0
Yearly Change 0
Migrants (net) 0
Median Age 0
Fertility Rate 0
Density (P/Km²) 0
Urban Pop % 0
Urban Population 0
Country’s Share of World Pop % 0
World Population 0
Country Global Rank 0
dtype: int64

三数据可视化

1 1955~2020年世界人口总数变化

# 3.1 1955~2020年世界人口总数变化情况
df1 = df.groupby('Year')['Population'].sum().to_frame('人口数量').reset_index()
year = list(df1['Year'])
population = list(df1['人口数量'])
p1 = (
    Bar(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.LIGHT))
    .add_xaxis(year)
    .add_yaxis(
        "",
        population,
        label_opts=opts.LabelOpts(is_show=False),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title='1955~2020年世界人口总数变化情况'),
        xaxis_opts=opts.AxisOpts(
            splitline_opts=opts.SplitLineOpts(is_show=True),
            axistick_opts=opts.AxisTickOpts(is_show=False),
        ),
        yaxis_opts=opts.AxisOpts(
            splitline_opts=opts.SplitLineOpts(is_show=True),
            axistick_opts=opts.AxisTickOpts(is_show=True),
        ),
        toolbox_opts=opts.ToolboxOpts(is_show=True),
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(is_show=False),
    )
    .reversal_axis()
    .render('1.html')
)

运行结果：
在这里插入图片描述
可以看到，1955~2020年世界人口总数正在逐步增加；其中，1955到2015年人口增长速度较快；
而从2015年开始，世界人口增长速度开始变慢，但是总数还是不断增加。
2 每个国家每年人口变化情况

class DrawLine:
    def line_charts(self,country):
        line = Line(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.LIGHT))
        df2 = df[df['Country'] == country]
        x_data = pd.Series(df2.Year.values.tolist(),dtype=int)
        y_data = pd.Series(df2.Population.values.tolist(),dtype=int)
        print(df2)
        print(type(x_data))#类型为list,但是pyecharts要求类型为int类型.不可x_data=df2.Year.values.tolist()，需要转换数据类型 list->int
        print(type(y_data))
        line.add_xaxis(x_data)
        line.add_yaxis(
            series_name=country,
            y_axis=y_data,
            label_opts=opts.LabelOpts(is_show=False),
        )
        line.set_global_opts(
            title_opts=opts.TitleOpts(title=country+'每年人口变化情况'),
            tooltip_opts=opts.TooltipOpts(trigger='axis',position='right'),
            xaxis_opts=opts.AxisOpts(
                type_='category',
                boundary_gap=False,
            ),
            yaxis_opts=opts.AxisOpts(
                type_='value',
                splitline_opts=opts.SplitLineOpts(is_show=True),
            )
        )
        return line
 if __name__ == '__main__':
    #1955~2020任意国家人口总数变化
    d = DrawLine()
    country = input('Please input the Country Name:\n')
    line = d.line_charts(country)
    line.render('YearlyTrendOfCountry.html')

运行结果：

Please input the Country Name:
India

在这里插入图片描述
从折线图可以看出，印度在1955到2016年间人口总数急剧上升；从2016年开始，人口增长速度开始放慢。
3 某年人口老龄化程度最严重的前10位和倒数10位国家–人口老龄化程度通常用年龄中位数来衡量

class populationAgingTop:
    def drawBar(self,year):
        df3 = df[df['Year']==year].sort_values(by='Median Age',ascending=False)
        df3_Top = df3[:10]#取排名前10位信息
        x_data1 = df3_Top.Country.values.tolist()
        y_data1 = df3_Top['Median Age'].values.tolist()
        print(x_data1)
        print(y_data1)
        #前10位
        barTop = Bar()
        barTop.add_xaxis(x_data1)
        barTop.add_yaxis(
            year,
            y_data1,
            label_opts=opts.LabelOpts(is_show=False),
        )
        barTop.set_global_opts(
            title_opts=opts.TitleOpts(title=str(year)+'年人口老龄化程度最严重的前10位国家'),
            tooltip_opts=opts.TooltipOpts(trigger='axis'),
            legend_opts=opts.LegendOpts(is_show=True),
        )
        return barTop
class populationAgingBottom:
    def drawBar2(self,year):
        #降序排列后取倒数10位，需排除0
        df3 = df[df['Year'] == year].sort_values(by='Median Age', ascending=False)
        df3_Bottom = df3[df3['Median Age'] > 0]
        print(df3_Bottom['Median Age'][-10:])
        # 倒数10位
        x_data2 = df3_Bottom['Country'][-10:].values.tolist()
        y_data2 = df3_Bottom['Median Age'][-10:].values.tolist()
        print(x_data2)
        print(y_data2)
        barBottom = Bar()
        barBottom.add_xaxis(x_data2)
        barBottom.add_yaxis(
            "",
            y_data2,
            label_opts=opts.LabelOpts(is_show=False),
        )
        barBottom.set_global_opts(
            title_opts=opts.TitleOpts(title=str(year)+'年人口老龄化程度最轻的前10位国家',pos_top='48%'),
            tooltip_opts=opts.TooltipOpts(trigger='axis'),
            legend_opts=opts.LegendOpts(is_show=True,pos_top='48%'),
        )
        barBottom.reversal_axis()
        return barBottom
 if __name__ == '__main__':
    #人口老龄化前10和倒数第10
    year = input('Please input the year:\n')
    AgingTop = populationAgingTop()
    Top_10 = AgingTop.drawBar(int(year))
    AgingBottom = populationAgingBottom()
    #一定要将year强制转换为整型，对应调用函数Year数据类型为int.而输入的year为string!!!!!
    Bottom_10 = AgingBottom.drawBar2(int(year))
    grid = (
       Grid(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.LIGHT))
        .add(Top_10,grid_opts=opts.GridOpts(pos_bottom='60%'))
        .add(Bottom_10,grid_opts=opts.GridOpts(pos_top='60%'))
        .render('PopulationAging.html')
    )

运行结果：

Please input the year:
2020

在这里插入图片描述
4 任意年份世界人口分布情况—世界地图

class globalMap:
    def drawMap(self,year):
        map = Map(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.MACARONS))
        dfMap = df[df['Year']==year]
        x_data = dfMap['Country'].values.tolist()
        y_data = dfMap['Population'].values.tolist()
        print(x_data)
        print(y_data)
        map.add(
            year,
            [list(z) for z in zip(x_data,y_data)],
            'world',
        )
        map.set_global_opts(
            title_opts=opts.TitleOpts(title=str(year)+'年世界人口分布地图'),
            visualmap_opts=opts.VisualMapOpts(max_=max(y_data))
        )
        return map
if __name__ == '__main__':
    #世界地图
    gMap = globalMap()
    map = gMap.drawMap(int(year))
    map.render('GlobalMap.html')

运行结果：
在这里插入图片描述
发现世界人口数目较多的国家集中分布在亚欧大陆，尤其是中国和印度的人口总数排名极其靠前
5 任意年份人口总数最多和最少的三位国家

class populationRanking:
    def drawBarTop(self,year):
        dfTop = df[df['Year']==year].sort_values(by='Population',ascending=False)
        x_data = dfTop[:3].Country.values.tolist()
        y_data = dfTop[:3].Population.values.tolist()
        print(x_data)
        print(y_data)
        bar = Bar()
        bar.add_xaxis(x_data)
        bar.add_yaxis(
            year,
            y_data,
            label_opts=opts.LabelOpts(is_show=False),
        )
        bar.set_global_opts(
            title_opts=opts.TitleOpts(title=str(year)+'年人口数目最多的三位国家'),
            legend_opts=opts.LegendOpts(is_show=True,pos_left='center'),

        )
        bar.reversal_axis()
        return bar
    def drawBarBottom(self,year):
        dfBottom = df[df['Year']==year].sort_values(by='Population',ascending=False)
        dfBottom = dfBottom[dfBottom['Population']>0]#排除0的可能性/空值
        x_data =dfBottom['Country'][-3:].values.tolist()
        y_data = dfBottom['Population'][-3:].values.tolist()
        print(x_data)
        print(y_data)
        bar = Bar()
        bar.add_xaxis(x_data)
        bar.add_yaxis(
            year,
            y_data,
            label_opts=opts.LabelOpts(is_show=False),
        )
        bar.set_global_opts(
            title_opts=opts.TitleOpts(title=str(year)+'年人口数目最少的三位国家',pos_top='60%'),
            legend_opts=opts.LegendOpts(is_show=True),
        )
        bar.reversal_axis()
        return bar
if __name__ == '__main__':
   #前3和倒数3位国家
   	year = input('Please input the year:\n')
    Rank = populationRanking()
    Rank.gridBar(int(year))