python画地图模拟迁徙_python爬取百度地图迁徙-迁入地来源和迁出目的地

百度地图迁徙链接为 :http://qianxi.baidu.com/

建议尽早爬取数据,以后可能会关闭

代码为:

import random

import time

from urllib import request

import re

import xlwt

from utils.read_write import readTXT

def set_style(name, height, bold=False):

style = xlwt.XFStyle() # 初始化样式

font = xlwt.Font() # 为样式创建字体

font.name = name # 'Times New Roman'

font.bold = bold

font.color_index = 4

font.height = height

# borders= xlwt.Borders()

# borders.left= 6

# borders.right= 6

# borders.top= 6

# borders.bottom= 6

style.font = font

# style.borders = borders

return style

f = xlwt.Workbook()

sheet2 = f.add_sheet(u'sheet2', cell_overwrite_ok=True) # 创建sheet2

row0 = [u'迁入城市',u'所在城市',u'占比',u'迁出城市',u'所在城市',u'占比']

# 生成第一行

for i in range(0, len(row0)):

sheet2.write(0, i, row0[i], set_style('Times New Roman', 200, True))

headers = {"User-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 "

"(KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"}

opener = request.build_opener()

opener.add_headers = [headers]

request.install_opener(opener)

default = set_style('Times New Roman', 220)

ID = []

name = []

date_list = []

lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')

for i in range(1, 389):

obj = lines[i].split(',')

ID.append(str(obj[0]))

name.append(str(obj[1]))

for riqi in range(20200101, 20200132):

date_list.append(str(riqi))

for riqi in range(20200201, 20200226):

date_list.append(str(riqi))

for riqi in date_list:

for i in range(0, len(ID)):

firsturl = "http://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&id=" + str(

ID[i]) + "&type=move_in&date=" + str(riqi) + "&callback=jsonp"

print(firsturl)

randint_data = random.randint(0, 6)

time.sleep(randint_data)

data = request.urlopen(firsturl).read().decode("utf-8")

data = data.encode("utf-8").decode("unicode_escape")

# 对Unicode编码进行改造

pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'

pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'

result = re.compile(pat).findall(str(data))

result1 = re.compile(pat1).findall(str(data))

column0 = result

column1 = result1

column2 = name[i]

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 0, column0[i1], default)

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 1, column2, default)

for i1 in range(0, len(column1)):

sheet2.write(i1 + len(column0) * i + 1, 2, column1[i1], default)

firsturl = "http://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&" \

"id="+str(ID[i])+"&type=move_out&date="+str(riqi)+"&callback=jsonp"

print(firsturl)

randint_data = random.randint(0, 6)

time.sleep(randint_data)

data2 = request.urlopen(firsturl).read().decode("utf-8")

data2 = data2.encode("utf-8").decode("unicode_escape") #

#对Unicode编码进行改造

pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'

pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'

result2 = re.compile(pat).findall(str(data2))

result12 = re.compile(pat1).findall(str(data2))

column0 = result2

column1 = result12

column2 = name[i]

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 3, column0[i1], default)

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 4, column2, default)

for i1 in range(0, len(column1)):

sheet2.write(i1 + len(column0) * i + 1, 5, column1[i1], default)

firsturl = "http://huiyan.baidu.com/migration/provincerank.jsonp?dt=country&id=" + str(

ID[i]) + "&type=move_in&date=" + str(riqi) + "&callback=jsonp"

print(firsturl)

randint_data = random.randint(0, 6)

time.sleep(randint_data)

data = request.urlopen(firsturl).read().decode("utf-8")

data = data.encode("utf-8").decode("unicode_escape")

# 对Unicode编码进行改造

pat = '{"province_name":(.*?),"value":.*?}'

pat1 = '{"province_name":".*?","value":(.*?)}'

result = re.compile(pat).findall(str(data))

result1 = re.compile(pat1).findall(str(data))

column0 = result

column1 = result1

column2 = name[i]

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 6, column0[i1], default)

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 7, column2, default)

for i1 in range(0, len(column1)):

sheet2.write(i1 + len(column0) * i + 1, 8, column1[i1], default)

firsturl = "http://huiyan.baidu.com/migration/provincerank.jsonp?dt=country&" \

"id="+str(ID[i])+"&type=move_out&date="+str(riqi)+"&callback=jsonp"

print(firsturl)

randint_data = random.randint(0, 6)

time.sleep(randint_data)

data2 = request.urlopen(firsturl).read().decode("utf-8")

data2 = data2.encode("utf-8").decode("unicode_escape") #

#对Unicode编码进行改造

pat = '{"province_name":(.*?),"value":.*?}'

pat1 = '{"province_name":".*?","value":(.*?)}'

result2 = re.compile(pat).findall(str(data2))

result12 = re.compile(pat1).findall(str(data2))

column0 = result2

column1 = result12

column2 = name[i]

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 9, column0[i1], default)

for i1 in range(0, len(column0)):

sheet2.write(i1 + len(column0) * i + 1, 10, column2, default)

for i1 in range(0, len(column1)):

sheet2.write(i1 + len(column0) * i + 1, 11, column1[i1], default)

print("大吉大利,今晚吃鸡啊!")

filename = 'D:\data\人口数据\百度迁徙大数据\全国城市省份市内流入流出\\'+str(riqi)+'.xls'

f.save(filename)

其中涉及到的文件下载请点击

https://download.csdn.net/user/qq_30803353/uploads

read_write.py文件链接:

//download.csdn.net/download/qq_30803353/12192963

百度城市编码文件:

//download.csdn.net/download/qq_30803353/12192965

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值