多进程跑Rij代码

import json
import sys
import time
from multiprocessing import Process, Manager
import requests


def nlists_ndatas(process_num, data_num, origin_long_dataset, server_num):
    """
    共会处理 process_num * data_num 个数据
    server_num是当前用的是第几个服务器,用于自动给整个originlist划分section
    返回的list是 有process_num个列表组成的 process_num * data_num 个数据
    """
    items_list = list(origin_long_dataset.items())
    list_of_list = []
    origin_section = items_list[
                     (process_num * data_num) * (server_num - 1):(process_num * data_num) * server_num]
    for v in range(process_num):
        list_temp = origin_section[v * data_num:(v + 1) * data_num]
        list_of_list.append(list_temp)
    return list_of_list


def get_road_distance(cbg_lonlat_dict_section,cbg_lonlat_dict):
    with lock:
        for a in cbg_lonlat_dict_section:
            point1_lon = cbg_lonlat_dict_section[a][0]
            point1_lat = cbg_lonlat_dict_section[a][1]
            list1 = []
            for b in cbg_lonlat_dict:
                point2_lon = cbg_lonlat_dict[b][0]
                point2_lat = cbg_lonlat_dict[b][1]
                if (point1_lon == point2_lon) and (point1_lat == point2_lat):
                    list1.append(0)
                else:
                    url = f'http://localhost:7070/otp/routers/default/plan?fromPlace={point1_lat},{point1_lon}&' \
                          f'toPlace={point2_lat},{point2_lon}&time=1:02pm&date=11-14-2017&mode=CAR&arriveBy=false&numItineraries=1'
                    response = requests.request("GET", url, verify=False)
                    drive_data = response.json()
                    try:
                        distance = round(drive_data['plan']['itineraries'][0]['legs'][0]['distance'])
                        list1.append(distance)
                    except KeyError:
                        list1.append(0)
                        print("KeyError: The key does not exist in the dictionary.")
            dict0[a] = list1


def nprocess(process_num):
    """
    :param process_num: 进程数量
    :param list_of_list:一个服务器要处理的所有数据
    :return:进程池
    """
    processes = []
    for i in range(process_num):
        lonlat_dict = dict(list_of_list[i])
        p = Process(target=get_road_distance, args=(lonlat_dict,cbg_lonlat_dict,))
        processes.append(p)
    return processes


if __name__ == '__main__':
    with open('cbg_lonlat_dict.json', 'r', encoding='utf-8') as f:
        cbg_lonlat_dict = json.load(f)

    list_of_list = nlists_ndatas(45, 10, cbg_lonlat_dict, 1)

    with Manager() as manager:
        dict0 = manager.dict()
        list1 = manager.list()
        lock = manager.Lock()
        process_list = nprocess(45)

        start = time.perf_counter()
        for process in process_list:
            process.start()
        for process in process_list:
            process.join()

        end = time.perf_counter()
        print('程序运行时间:' + format(end - start))

        dict0 = dict(dict0)

    print(dict0)
    print(len(dict0))

    with open('server1_cbg_road_distance.json', 'w', encoding='utf-8') as f:
        json.dump(dict0, f, indent=4, ensure_ascii=False)

代码将总1726个cbg的经纬度输入到otp本地服务器上不断爬取两个经纬度之间的道路距离数据。

代码中用到了lock锁便于保证共享数据写入时不会发生错乱,但是具体是否加上锁就一定能保证按照想要的去写入还不确定,还需要进一步证实数据写入方式是否会引起错乱、对不齐的情况。

函数get_road_distance中有list1=[ ],可能是导致数据写入保持有序的原因。

从这个代码来看的话写多进程也不算太难,主要就是代码同时运行的时候,如何保证数据的写入方式是我们想要的那样才是重点。

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值