本文数据来源是各省市第六次人口普查主要数据公报
python版本:2.7.x
pyecharts版本:0.5.11
# -*- coding:utf8 -*-
import os
import json
import requests
import numpy as np
import pandas as pd
from pyecharts import Geo
def saveProvienceData(dataFolderPath, resultOutputPath):
'''
将Excel数据转换为json数据
并调用百度地图API获取每个城市的经纬度坐标
params:
dataFolderPath: 数据文件夹路径
resultOutputPath: 结果输出文件夹路径
return:
'''
provinces = os.listdir(dataFolderPath)
for province in provinces:
files = os.listdir(dataFolderPath + "\\" + province)
province_data = {}
for file in files:
if file.find("rkpc") != -1:
file_name = dataFolderPath + "\\" + province + "\\" + file
print(file_name)
df = pd.read_excel(file_name)
for i in df.index.values:
# 第一行数据为全省数据,剔除
if i == 0:
continue
city = df.ix[i, ['area']].values
city_name = city[0]
loc = getLocation(city[0])
lat = loc['result']['location']['lat']
lng = loc['result']['location']['lng']
pop = df.ix[i, ['population']].values
pop_size = int(pop[0])
data = {'city': city_name, 'lat': lat, 'lng': lng, 'popSize': pop_size}
province_data[city_name] = data
if not os.path.exists(resultOutputPath):
#创建路径
os.makedirs(resultOutputPath)
file = resultOutputPath + "\\" + province + ".json"
with open(file, "wb") as fp:
json.dump(province_data, fp=fp, indent=4)
def getLocation(city, page_num=0):
'''
获取城市经纬度坐标
params:
city:需要查询的城市
page_num:结果分页页码,默认为0
return:
decodejson:城市的经纬度坐标
'''
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'
headers = {'User-Agent': user_agent}
pa = {
'address': city,
'output': 'json',
'ak': '**********************'
}
response = requests.get("http://api.map.baidu.com/geocoding/v3/", params=pa, headers=headers)
decodejson = json.loads(response.text)
return decodejson
def showData(province, data, coords, outputPath):
geo = Geo(
"全国主要城市第六次人口普查数据",
"data from www.stats.gov.cn",
title_color="#fff",
title_pos="center",
width=1200,
height=600,
background_color="#404a59",
)
attr, value = geo.cast(data)
for pos in attr:
print pos
print(coords[pos])
geo.add(
"",
attr,
value,
visual_range=[1, 15000000],
#geo_cities_coords = coords,
maptype= "china",
type='scatter',
#type='heatmap',
visual_text_color="#fff",
is_piecewise = True,
visual_split_number= 20,
symbol_size=15,
is_visualmap=True,
)
geo.render(outputPath + ".\\" + province + ".html")
def chinaDraw(dataPath, resultPath):
if not os.path.exists(resultPath):
#创建路径
os.makedirs(resultPath)
files = os.listdir(dataPath)
coords = {}
population_data = []
for file in files:
filePath = dataPath + "\\" + file
print(filePath)
with open(filePath, 'rb') as fp:
pop_dict = json.load(fp)
for key in pop_dict:
pop_data = pop_dict[key]
city_name = pop_data["city"]
city_lat = pop_data["lat"]
city_lng = pop_data["lng"]
city_popSize = pop_data["popSize"]
coords[city_name] = [city_lat, city_lng]
data = (city_name, city_popSize)
population_data.append(data)
showData("china", population_data, coords, resultPath)
if __name__ == '__main__':
#saveProvienceData(".\\data", ".\\provinceData")
#chinaDraw(".\\provinceData\\", ".\\chinaOutput")
数据处理结果