小白代码成长养成记[1]--路网数据爬取与使用和建模研究

最新推荐文章于 2024-04-17 21:52:47 发布

萌萌可爱小叶子

最新推荐文章于 2024-04-17 21:52:47 发布

阅读量1k

点赞数

本文链接：https://blog.csdn.net/weixin_43809280/article/details/102555725

版权

路网数据爬取和使用主要用的地图软件为：百度地图
百度地图的开发者平台，上面的轻量级路线规划上面有服务文档，具体介绍了怎么去写URL，并且怎么去请求。
第一步：批量获取百度地图中一个区域的交叉路口的名称和经纬度。
这个也是在博客园上参考了别人的教程，并在其基础上做了改进，下面我就放上参考的博客链接：
https://www.cnblogs.com/lilinpging/p/8193324.html
下面是我写的关于交叉路口的爬取，爬取出来后进行去重，然后再进行相邻交叉路口的匹配：

#-*- coding: utf-8 -*-
"""Spyder Editor
 This is a temporary script file.
"""
import requests
import time
import socket
import urllib.request
import bs4
import math
import os
import json
import sys
import importlib
importlib.reload(sys)
#匹配的交叉口的条件设置为1公里以内的为相邻的交叉口
url = 'http://api.map.baidu.com/directionlite/v1/riding?'
def get_result(origin,destination):
    params = {          'mode':'riding',
                        'origin':origin[1],
                        'destination':destination[1],
                        'origin_region':origin[0],
                        'destination_region':destination[0],
                        'output':'json',
                        'ak':'q6S3S45K1cZ8KpTvX9704TLTZnKBEwhQ' #需自己填写，去百度开发者平台申请，申请的类型需为服务端
             }
    r = requests.get(url,params)
    r_js = r.json()
    routes_ = r_js['result']['routes'][0]
    # steps_ = routes_['steps']
    dis_ = routes_['distance']
    # if len(steps_) == 1:
    if dis_<1000:
        return dis_
#爬取交叉路口
def crossroad(lat_1,lon_1,lat_2,lon_2,ak,las):
    push='./crossroad.txt'
    f=open(push,'w')
    # 规定的矩形的边长
    lat_count = int((lat_2 - lat_1) / las + 1)  # 纬度上可以有几个边长
    lon_count = int((lon_2 - lon_1) / las + 1)  # 经度上可以有几个边长
    for lat_count in range(0, lat_count):
        lat_b1 = round(lat_1 + las * lat_count,6)
        for lon_count in range(0, lon_count):
            lon_b1 = round(lon_1 + las * lon_count,6)
            for i in range(0, 20):
                page_num = str(i)
                url='http://api.map.baidu.com/place/v2/search?query=%E4%BA%A4%E5%8F%89%E5%8F%A3&%20&bounds='+str(lat_b1)+','+str(lon_b1)+','+str(lat_b1+las)+','+str(lon_b1+las)+'&page_size=20&page_num='+str(page_num)+'&output=json&ak='+ak
                response = urllib.request.urlopen(url)
                data=json.load(response)
                print(data)
                for item in data['results']:
                    jname=item['name']
                    jlat=item['location']['lat']
                    jlon=item['location']['lng']
                    j_str=jname+','+str(jlat)+','+str(jlon)+'\n'
                    f.write(j_str)
    f.close()
#这个函数的功能就是对crossroad.txt里面的数据进行去重
def text():
    a = 0
    read = "./crossroad.txt"  # old
    write = "./new.txt"  # new
    lines_seen = []
    outfile = open(write, "w")
    f = open(read, "r")
    for line_ in f:
        line = line_.split(',')
        name = line[0]
        lat = line[1]
        lon = line[2]
        position = (lat, lon)
        if position not in lines_seen:
            a += 1
            outfile.write(line_)
            lines_seen.append(position)
            print(a)
            print('\n')
    outfile.close()
    print("success")
#处理经纬度转换成米勒坐标（自行百度搜索此坐标类型）的函数，并且筛掉不是横平竖直的相邻交叉口对
def millerToXY (lon_x1, lat_y1,lon_x2,lat_y2):
    L = 6381372*math.pi*2
    W = L
    H = L/2
    mill = 2.3
    x1 = float(lon_x1) * math.pi/180
    y1 = float(lat_y1) * math.pi/180
    x2 = float(lon_x2) * math.pi / 180
    y2 = float(lat_y2) * math.pi / 180
    y1 = 1.25 * math.log(math.tan(0.25 * math.pi+0.4 * y1))
    x1 = (W/2)+(W/(2 * math.pi)) * x1
    y1 = (H/2)-(H/(2 * mill)) * y1
    # x1y1_coordinate.append((int(round(x1)),int(round(y1))))
    y2 = 1.25 * math.log(math.tan(0.25 * math.pi+0.4 * y2))
    x2 = (W/2)+(W/(2 * math.pi)) * x2
    y2 = (H/2)-(H/(2 * mill)) * y2
    # x2y2_coordinate.append((int(round(x2)),int(round(y2))))
    # return x1y1_coordinate,x2y2_coordinate
    try:
        k=(y2-y1)/(x2-x1)
    except ZeroDivisionError:
        k = 0
    if math.pi/180*(-5)<math.atan(k)<math.pi/180*5:
        return k
def main():
    lat_1 = 33.937268
    lon_1 = 113.572164
    lat_2 = 34.232903
    lon_2 = 114.049344
    # lat_1 = 34.106616
    # lon_1 = 113.788942
    # lat_2 = 34.130764
    # lon_2 = 113.852758
    ak = 'q6S3S45K1cZ8KpTvX9704TLTZnKBEwhQ'
    las = round((lon_2 - lon_1) / 10, 6)
    crossroad(lat_1, lon_1, lat_2, lon_2, ak, las)
    read_list = []
    road_net = open('./road_net.txt', 'a')
    # contrast=open('./contrast.txt', 'a')
    count1 = 1
    get = 1
#这个地方定义了一个contrast组，是因为在请求百度地图时，经常会出现与主机失败，需要重新运行程序去爬取，则对照组的作用就是让爬过的道路不在爬取，进而也解决了百度地图爬取每日限制额度的问题，也节约了爬取时间。
    contrast_list = []
    with open('./contrast.txt','r') as contrast:
        for line in contrast.readlines():
            contrast_list.append(line)
        contrast.close()

    contrast = open('./contrast.txt','a')
    text()
    with open('./new.txt','r') as readfile:
        for line in readfile.read().splitlines():#对于打开的文件new.txt进行按行读取
            read_list.append(line)#把读取的结果放入到定义好的read—list里面
        for index in range(len(read_list)):#索引列表的长度从零开始
            origin_line = read_list[index]
            origin = (origin_line.split(',')[0], origin_line.split(',')[1] + ',' + origin_line.split(',')[2])  # 【0】表示起始位的名称，[1],[2]合并一起表示起始位的经纬度
            # crossroadname1 = origin_line.split(',')[0]
            lon_x1 = origin_line.split(',')[2]  # 第一个交叉口的经度
            lat_y1 = origin_line.split(',')[1]  # 第一个交叉口的纬度
            # origin_line = read_list[index]#把第一位规定为起始位来一次遍历剩余的
            print('正在对第%d个交叉口进行检索..........................................' % count1)
            count2 = index+1
            for destination_line in read_list[index+1:]:#终点为从起始位的下一位开始一直到最后，即一个起点遍历所有的终点
                count2 += 1
                destination = (destination_line.split(',')[0],destination_line.split(',')[1] + ',' + destination_line.split(',')[2])#【0】表示终点位的名称，[1],[2]合并一起表示终点位的经纬度
                # crossroadname2 = destination_line.split(',')[0]  # 第二个交叉口的名字
                lon_x2 = destination_line.split(',')[2]  # 第二个交叉口的经度
                lat_y2 = destination_line.split(',')[1]  # 第二个交叉口的纬度
                mile=millerToXY(lon_x1, lat_y1, lon_x2, lat_y2)
                if mile:
                    param = (origin_line + '|' + destination_line+"\n")
                    if param not in contrast_list:
                        result = get_result(origin,destination)#调用上面的函数
                    else:
                        continue
                    print('第%d个交叉口正在与第%d个交叉口进行匹配' % (count1,count2))
                    contrast.write(origin_line + '|' + destination_line +"\n")
                    if result:
                        road_net.write(origin_line +'|'+destination_line+"\n")
                          #contrast.write((origin_line + '|' + destination_line + str(result)))
                        print('找到第',get,'对交叉口啦！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！')
                        get += 1
            count1 += 1
if __name__=="__main__":
    main()

下面的代码是要对匹配的交叉口进行处理，即得出后一个交叉口在前一个交叉口的哪个位置：

import math
handle = open('./handle.txt', 'w')
road_net_list = []
with open("./ceshi.txt", "r",encoding="utf-8") as Handle:
    for line in Handle.read().splitlines():
        road_net_list.append(line)
    for index in range(len(road_net_list)):
        crossroad = road_net_list[index]
        crossroad1 = crossroad.split('|')[0]  # 匹配的第一个交叉口信息
        crossroad2 = crossroad.split('|')[1]  # 匹配的第二个交叉口信息
        crossroadname1 = crossroad1.split(',')[0]  # 第一个交叉口的名字
        lon_x1 = crossroad1.split(',')[2]  # 第一个交叉口的经度
        lat_y1 = crossroad1.split(',')[1]  # 第一个交叉口的纬度
        crossroadname2 = crossroad2.split(',')[0]  # 第二个交叉口的名字
        lon_x2 = crossroad2.split(',')[2]  # 第二个交叉口的经度
        lat_y2 = crossroad2.split(',')[1]  # 第二个交叉口的纬度
        L = 6381372 * math.pi * 2
        W = L
        H = L / 2
        mill = 2.3
        a = float(lon_x1) * math.pi / 180
        b = float(lat_y1) * math.pi / 180
        c = float(lon_x2) * math.pi / 180
        d = float(lat_y2) * math.pi / 180
        x1 = (W / 2) + (W / (2 * math.pi)) * a
        y1 = 1.25 * math.log(math.tan(0.25 * math.pi + 0.4 *b))*(H / 2) - (H / (2 * mill))
        # y1 = (H / 2) - (H / (2 * mill)) * y1
        # x1y1_coordinate.append((int(round(x1)),int(round(y1))))
        x2 = (W / 2) + (W / (2 * math.pi)) * c
        y2 = 1.25 * math.log(math.tan(0.25 * math.pi + 0.4 * d))*(H / 2) - (H / (2 * mill))
        # y2 = (H / 2) - (H / (2 * mill)) * y2
        # try:
        #     k = (y2 - y1) / (x2 - x1)
        # except ZeroDivisionError:
        #     k = 9999
        if y2-y1>200 and y2-y1>x2-x1:
            handle.write(crossroad+"|"+"第二个交叉口在正北方向"+"\n")#第二个交叉口在第一个交叉口的哪个方向
        elif y2-y1<-200 and  y2-y1<x2-x1:
            handle.write(crossroad + "|" + "第二个交叉口在正南方向" + "\n")  # 第二个交叉口在第一个交叉口的哪个方向
        elif x2-x1>0 and x2-x1>y2-y1 and 0<abs(y2-y1)<180:
            handle.write(crossroad+"|"+"第二个交叉口在正东方向"+"\n")#第二个交叉口在第一个交叉口的哪个方向
        elif  x2-x1<0 and y2-y1>x2-x1 and 0<abs(y2-y1)<180:
            handle.write(crossroad+"|"+"第二个交叉口在正西方向"+"\n")#第二个交叉口在第一个交叉口的哪个方向

得出的数据是如下图：
在这里插入图片描述
上面得出的方向可能准确率只有百分之80，所以后期还需人工筛选。
到这里，路网的爬取这一块就彻底的结束了，欢迎小伙伴的参考！！！并且提出问题。

萌萌可爱小叶子

关注

0
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
小白代码成长养成记[1]--路网数据爬取与使用和建模研究

路网数据爬取和使用主要用的地图软件为：百度地图百度地图的开发者平台，上面的轻量级路线规划上面有服务文档，具体介绍了怎么去写URL，并且怎么去请求。第一步：批量获取百度地图中一个区域的交叉路口的名称和经纬度。这个也是在博客园上参考了别人的教程，并在其基础上做了改进，下面我就放上参考的博客链接：https://www.cnblogs.com/lilinpging/p/8193324.html...
复制链接

扫一扫