一 项目说明
1 数据来源:阿里云天池数据集------1955年至2020年的国家人口
2 字段说明:
Data columns (total 14 columns):
Column Non-Null Count Dtype
0 Year 4195 non-null int64 -----年份
1 Country 4195 non-null object -----国家名称
2 Population 4195 non-null int64 -----人口总数
3 Yearly % Change 4195 non-null float64-----人口年变化率
4 Yearly Change 4195 non-null int64 -----人口年改变数量
5 Migrants (net) 3600 non-null float64-----移民人数
6 Median Age 3600 non-null float64-----年龄中位数
7 Fertility Rate 3600 non-null float64-----生育率
8 Density (P/Km²) 4195 non-null object -----人口密度(人/平方千米)
9 Urban Pop % 4082 non-null float64-----城镇人口比例
10 Urban Population 4082 non-null float64-----城镇人口数目
11 Country’s Share of World Pop % 4195 non-null float64-----国家人口数对世界人口总数的贡献率
12 World Population 4195 non-null int64 -----世界人口总数
13 Country Global Rank 4195 non-null int64 -----人口总数世界排名
dtypes: float64(7), int64(5), object(2)
memory usage: 459.0+ KB
二 数据清洗
# 1 导入模块
import pandas as pd
import pyecharts.options as opts
from pyecharts.charts import Bar,Line,Grid,Map
from pyecharts.globals import ThemeType
# 2 数据清洗
df = pd.read_csv('D:\\LearningMaterials\Countries Population from 1995 to 2020.csv')
#验证读取是否成功
print(df.head())
#查看数据集索引、内存和数据类型
print(df.info())
#发现有空值,用0填充,直接在原始数据上进行修改。即inplca=True.否则无效
df.fillna(0,inplace=True)
#验证
print(df.isnull().sum())
运行结果:
Year Country … World Population Country Global Rank
0 2020 China … 7794798739 1
1 2019 China … 7713468100 1
2 2018 China … 7631091040 1
3 2017 China … 7547858925 1
4 2016 China … 7464022049 1
[5 rows x 14 columns]
<class ‘pandas.core.frame.DataFrame’>
RangeIndex: 4195 entries, 0 to 4194
Data columns (total 14 columns):
Column Non-Null Count Dtype
0 Year 4195 non-null int64
1 Country 4195 non-null object
2 Population 4195 non-null int64
3 Yearly % Change 4195 non-null float64
4 Yearly Change 4195 non-null int64
5 Migrants (net) 3600 non-null float64
6 Median Age 3600 non-null float64
7 Fertility Rate 3600 non-null float64
8 Density (P/Km²) 4195 non-null object
9 Urban Pop % 4082 non-null float64
10 Urban Population 4082 non-null float64
11 Country’s Share of World Pop % 4195 non-null float64
12 World Population 4195 non-null int64
13 Country Global Rank 4195 non-null int64
dtypes: float64(7), int64(5), object(2)
memory usage: 459.0+ KB
None
Year 0
Country 0
Population 0
Yearly % Change 0
Yearly Change 0
Migrants (net) 0
Median Age 0
Fertility Rate 0
Density (P/Km²) 0
Urban Pop % 0
Urban Population 0
Country’s Share of World Pop % 0
World Population 0
Country Global Rank 0
dtype: int64
三 数据可视化
1 1955~2020年世界人口总数变化
# 3.1 1955~2020年世界人口总数变化情况
df1 = df.groupby('Year')['Population'].sum().to_frame('人口数量').reset_index()
year = list(df1['Year'])
population = list(df1['人口数量'])
p1 = (
Bar(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.LIGHT))
.add_xaxis(year)
.add_yaxis(
"",
population,
label_opts=opts.LabelOpts(is_show=False),
)
.set_global_opts(
title_opts=opts.TitleOpts(title='1955~2020年世界人口总数变化情况'),
xaxis_opts=opts.AxisOpts(
splitline_opts=opts.SplitLineOpts(is_show=True),
axistick_opts=opts.AxisTickOpts(is_show=False),
),
yaxis_opts=opts.AxisOpts(
splitline_opts=opts.SplitLineOpts(is_show=True),
axistick_opts=opts.AxisTickOpts(is_show=True),
),
toolbox_opts=opts.ToolboxOpts(is_show=True),
)
.set_series_opts(
label_opts=opts.LabelOpts(is_show=False),
)
.reversal_axis()
.render('1.html')
)
运行结果:
可以看到,1955~2020年世界人口总数正在逐步增加;其中,1955到2015年人口增长速度较快;
而从2015年开始,世界人口增长速度开始变慢,但是总数还是不断增加。
2 每个国家每年人口变化情况
class DrawLine:
def line_charts(self,country):
line = Line(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.LIGHT))
df2 = df[df['Country'] == country]
x_data = pd.Series(df2.Year.values.tolist(),dtype=int)
y_data = pd.Series(df2.Population.values.tolist(),dtype=int)
print(df2)
print(type(x_data))#类型为list,但是pyecharts要求类型为int类型.不可x_data=df2.Year.values.tolist(),需要转换数据类型 list->int
print(type(y_data))
line.add_xaxis(x_data)
line.add_yaxis(
series_name=country,
y_axis=y_data,
label_opts=opts.LabelOpts(is_show=False),
)
line.set_global_opts(
title_opts=opts.TitleOpts(title=country+'每年人口变化情况'),
tooltip_opts=opts.TooltipOpts(trigger='axis',position='right'),
xaxis_opts=opts.AxisOpts(
type_='category',
boundary_gap=False,
),
yaxis_opts=opts.AxisOpts(
type_='value',
splitline_opts=opts.SplitLineOpts(is_show=True),
)
)
return line
if __name__ == '__main__':
#1955~2020任意国家人口总数变化
d = DrawLine()
country = input('Please input the Country Name:\n')
line = d.line_charts(country)
line.render('YearlyTrendOfCountry.html')
运行结果:
Please input the Country Name:
India
从折线图可以看出,印度在1955到2016年间人口总数急剧上升;从2016年开始,人口增长速度开始放慢。
3 某年人口老龄化程度最严重的前10位和倒数10位国家–人口老龄化程度通常用年龄中位数来衡量
class populationAgingTop:
def drawBar(self,year):
df3 = df[df['Year']==year].sort_values(by='Median Age',ascending=False)
df3_Top = df3[:10]#取排名前10位信息
x_data1 = df3_Top.Country.values.tolist()
y_data1 = df3_Top['Median Age'].values.tolist()
print(x_data1)
print(y_data1)
#前10位
barTop = Bar()
barTop.add_xaxis(x_data1)
barTop.add_yaxis(
year,
y_data1,
label_opts=opts.LabelOpts(is_show=False),
)
barTop.set_global_opts(
title_opts=opts.TitleOpts(title=str(year)+'年人口老龄化程度最严重的前10位国家'),
tooltip_opts=opts.TooltipOpts(trigger='axis'),
legend_opts=opts.LegendOpts(is_show=True),
)
return barTop
class populationAgingBottom:
def drawBar2(self,year):
#降序排列后取倒数10位,需排除0
df3 = df[df['Year'] == year].sort_values(by='Median Age', ascending=False)
df3_Bottom = df3[df3['Median Age'] > 0]
print(df3_Bottom['Median Age'][-10:])
# 倒数10位
x_data2 = df3_Bottom['Country'][-10:].values.tolist()
y_data2 = df3_Bottom['Median Age'][-10:].values.tolist()
print(x_data2)
print(y_data2)
barBottom = Bar()
barBottom.add_xaxis(x_data2)
barBottom.add_yaxis(
"",
y_data2,
label_opts=opts.LabelOpts(is_show=False),
)
barBottom.set_global_opts(
title_opts=opts.TitleOpts(title=str(year)+'年人口老龄化程度最轻的前10位国家',pos_top='48%'),
tooltip_opts=opts.TooltipOpts(trigger='axis'),
legend_opts=opts.LegendOpts(is_show=True,pos_top='48%'),
)
barBottom.reversal_axis()
return barBottom
if __name__ == '__main__':
#人口老龄化前10和倒数第10
year = input('Please input the year:\n')
AgingTop = populationAgingTop()
Top_10 = AgingTop.drawBar(int(year))
AgingBottom = populationAgingBottom()
#一定要将year强制转换为整型,对应调用函数Year数据类型为int.而输入的year为string!!!!!
Bottom_10 = AgingBottom.drawBar2(int(year))
grid = (
Grid(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.LIGHT))
.add(Top_10,grid_opts=opts.GridOpts(pos_bottom='60%'))
.add(Bottom_10,grid_opts=opts.GridOpts(pos_top='60%'))
.render('PopulationAging.html')
)
运行结果:
Please input the year:
2020
4 任意年份世界人口分布情况—世界地图
class globalMap:
def drawMap(self,year):
map = Map(init_opts=opts.InitOpts(width='1600px',height='800px',theme=ThemeType.MACARONS))
dfMap = df[df['Year']==year]
x_data = dfMap['Country'].values.tolist()
y_data = dfMap['Population'].values.tolist()
print(x_data)
print(y_data)
map.add(
year,
[list(z) for z in zip(x_data,y_data)],
'world',
)
map.set_global_opts(
title_opts=opts.TitleOpts(title=str(year)+'年世界人口分布地图'),
visualmap_opts=opts.VisualMapOpts(max_=max(y_data))
)
return map
if __name__ == '__main__':
#世界地图
gMap = globalMap()
map = gMap.drawMap(int(year))
map.render('GlobalMap.html')
运行结果:
发现世界人口数目较多的国家集中分布在亚欧大陆,尤其是中国和印度的人口总数排名极其靠前
5 任意年份人口总数最多和最少的三位国家
class populationRanking:
def drawBarTop(self,year):
dfTop = df[df['Year']==year].sort_values(by='Population',ascending=False)
x_data = dfTop[:3].Country.values.tolist()
y_data = dfTop[:3].Population.values.tolist()
print(x_data)
print(y_data)
bar = Bar()
bar.add_xaxis(x_data)
bar.add_yaxis(
year,
y_data,
label_opts=opts.LabelOpts(is_show=False),
)
bar.set_global_opts(
title_opts=opts.TitleOpts(title=str(year)+'年人口数目最多的三位国家'),
legend_opts=opts.LegendOpts(is_show=True,pos_left='center'),
)
bar.reversal_axis()
return bar
def drawBarBottom(self,year):
dfBottom = df[df['Year']==year].sort_values(by='Population',ascending=False)
dfBottom = dfBottom[dfBottom['Population']>0]#排除0的可能性/空值
x_data =dfBottom['Country'][-3:].values.tolist()
y_data = dfBottom['Population'][-3:].values.tolist()
print(x_data)
print(y_data)
bar = Bar()
bar.add_xaxis(x_data)
bar.add_yaxis(
year,
y_data,
label_opts=opts.LabelOpts(is_show=False),
)
bar.set_global_opts(
title_opts=opts.TitleOpts(title=str(year)+'年人口数目最少的三位国家',pos_top='60%'),
legend_opts=opts.LegendOpts(is_show=True),
)
bar.reversal_axis()
return bar
if __name__ == '__main__':
#前3和倒数3位国家
year = input('Please input the year:\n')
Rank = populationRanking()
Rank.gridBar(int(year))
运行结果: