第六次全国人口普查数据分析

本文数据来源是各省市第六次人口普查主要数据公报

python版本:2.7.x

pyecharts版本:0.5.11

 

# -*- coding:utf8 -*-
import os
import json
import requests
import numpy as np
import pandas as pd
from pyecharts import Geo

def saveProvienceData(dataFolderPath, resultOutputPath):
    '''
    将Excel数据转换为json数据
    并调用百度地图API获取每个城市的经纬度坐标
    params:
    dataFolderPath: 数据文件夹路径
    resultOutputPath: 结果输出文件夹路径
    return:
    '''
    provinces = os.listdir(dataFolderPath)

    for province in provinces:
        files = os.listdir(dataFolderPath + "\\" + province)

        province_data = {}

        for file in files:
            if file.find("rkpc") != -1:
                file_name = dataFolderPath + "\\" + province + "\\" + file
                print(file_name)
                df = pd.read_excel(file_name)
                for i in df.index.values:

                    # 第一行数据为全省数据,剔除
                    if i == 0: 
                        continue
                    
                    city = df.ix[i, ['area']].values
                    city_name = city[0]
                    
                    loc = getLocation(city[0])
                    lat = loc['result']['location']['lat']
                    lng = loc['result']['location']['lng']

                    pop = df.ix[i, ['population']].values
                    pop_size = int(pop[0])

                    data = {'city': city_name, 'lat': lat, 'lng': lng, 'popSize': pop_size}
                    province_data[city_name] = data

        if not os.path.exists(resultOutputPath):
            #创建路径
            os.makedirs(resultOutputPath)
        file = resultOutputPath + "\\" + province + ".json"

        with open(file, "wb") as fp:
            json.dump(province_data, fp=fp, indent=4)

def getLocation(city, page_num=0):
    '''
    获取城市经纬度坐标
    params:
    city:需要查询的城市
    page_num:结果分页页码,默认为0
    return:
    decodejson:城市的经纬度坐标
    '''
    user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'
    headers = {'User-Agent': user_agent}
    pa = {
        'address': city,
        'output': 'json',
        'ak': '**********************'
    }
    response = requests.get("http://api.map.baidu.com/geocoding/v3/", params=pa, headers=headers)
    decodejson = json.loads(response.text)
    return decodejson

def showData(province, data, coords, outputPath):

    geo = Geo(
        "全国主要城市第六次人口普查数据",
        "data from www.stats.gov.cn",
        title_color="#fff",
        title_pos="center",
        width=1200,
        height=600,
        background_color="#404a59",
    )
    attr, value = geo.cast(data)
    for pos in attr:
        print pos
        print(coords[pos])
    geo.add(
        "",
        attr,
        value,
        visual_range=[1, 15000000],
        #geo_cities_coords = coords,
        maptype= "china",
        type='scatter',
        #type='heatmap',
        visual_text_color="#fff",
        is_piecewise = True,
        visual_split_number= 20,
        symbol_size=15,
        is_visualmap=True,
    )
    geo.render(outputPath + ".\\" + province + ".html")

def chinaDraw(dataPath, resultPath):

    if not os.path.exists(resultPath):
        #创建路径
        os.makedirs(resultPath)    

    files = os.listdir(dataPath)

    coords = {}
    population_data = []

    for file in files:

        filePath = dataPath + "\\" + file
        
        print(filePath)

        with open(filePath, 'rb') as fp:
            pop_dict = json.load(fp)

            for key in pop_dict:
                pop_data = pop_dict[key]
                city_name = pop_data["city"]
                city_lat = pop_data["lat"]
                city_lng = pop_data["lng"]
                city_popSize = pop_data["popSize"]
                coords[city_name] = [city_lat, city_lng]
                data = (city_name, city_popSize)
                population_data.append(data)

    showData("china", population_data, coords, resultPath)



if __name__ == '__main__':  

    #saveProvienceData(".\\data", ".\\provinceData")
    #chinaDraw(".\\provinceData\\", ".\\chinaOutput")

 

数据处理结果

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值