futures/asyncio/multiprocess

多进程/多线程

import concurrent.futures
import multiprocessing
import asyncio

# ##########################多进程1####################################
# async def insertAndUpdate(x):
def insertAndUpdate(x):
    sql = text('''
    INSERT INTO db_test.dw_org_addr_latest(
        `org_id`             , -- '企业ID',
        `addr_type`          , -- '地址用途(1-注册地 2-经营地-总部)',
        `address`            , -- '企业地址',
        `addr_md5`           , -- '地址MD5(逻辑外键)',
        `lng_lat_GCJ02`      , -- '经纬度',
        `lng_lat_GCJ02_md5`  , -- '经纬度(GCJ02)MD5',
        `UA0003`             , -- '地址所在国家/地区',
        `UA0004`             , -- '地址所在[省(自治区/直辖市)]',
        `UA0005`             , -- '地址所在[地(区/市/州/盟)]',
        `UA0006`             , -- '地址所在[县(区/市/旗)]',
        `UA0007`             , -- '地址所在[(街道办事处)]',
        `UA0008`             , -- '地址所在社区/社区居委会',
        `park_code`          , -- '地址所在园区',
        `district_code`       -- '地址所在片区',
        ) 
        VALUES(
        :org_id             ,
        :addr_type          ,
        :address            ,
        :addr_md5           ,
        :lng_lat_GCJ02      ,
        :lng_lat_GCJ02_md5  ,
        :UA0003             ,
        :UA0004             ,
        :UA0005             ,
        :UA0006             ,
        :UA0007             ,
        :UA0008             ,
        :park_code          ,
        :district_code      
        )
        ON DUPLICATE KEY
        UPDATE 
        address              =  :address           ,
        addr_md5             =  :addr_md5          ,
        lng_lat_GCJ02        =  :lng_lat_GCJ02     ,
        lng_lat_GCJ02_md5    =  :lng_lat_GCJ02_md5 ,
        UA0003               =  :UA0003            ,
        UA0004               =  :UA0004            ,
        UA0005               =  :UA0005            ,
        UA0006               =  :UA0006            ,
        UA0007               =  :UA0007            ,
        UA0008               =  :UA0008            ,
        park_code            =  :park_code         ,
        district_code        =  :district_code     
        ;
    ''').bindparams(
        org_id=x.org_id,
        addr_type=x.addr_type,
        address=x.address,
        addr_md5=x.addr_md5,
        lng_lat_GCJ02=x.lng_lat_GCJ02,
        lng_lat_GCJ02_md5=x.lng_lat_GCJ02_md5,
        UA0003=x.UA0003,
        UA0004=x.UA0004,
        UA0005=x.UA0005,
        UA0006=x.UA0006,
        UA0007=x.UA0007,
        UA0008=x.UA0008,
        park_code=x.park_code,
        district_code=x.district_code
    )
    with engine.connect() as conn:
        conn.execute(sql)
    # await engine.connect().execute(sql)

# 普通0:20:35.624958
# data.apply(lambda x:insertAndUpdate(x),axis=1)
# 异步0:19:41.543852
# loop = asyncio.get_event_loop()
# tasks = [insertAndUpdate(row) for index, row in data.iterrows()]
# loop.run_until_complete(asyncio.wait(tasks))
# 多线 cost: 0:02:01.558449
with concurrent.futures.ThreadPoolExecutor(workers) as executor:
    executor.map(insertAndUpdate, [row for index,row in data.iterrows()])

# ########################多进程2######################################

def dw_addr_admin_area_code(data,workers,return_dict):
    with concurrent.futures.ThreadPoolExecutor(workers) as executor:
        res = executor.map(getDwAddrAdminAreaCode, data["lng_lat_GCJ02_md5"])
    res = pd.DataFrame(res,columns=["UA0003","UA0004","UA0005","UA0006","UA0007","UA0008"])
    return_dict["dw_addr_admin_area_code"]=res

def dw_addr_park_code(data,workers,return_dict):
    with concurrent.futures.ThreadPoolExecutor(workers) as executor:
        res = executor.map(getDwAddrParkCode, data["lng_lat_GCJ02_md5"])
    res = pd.DataFrame(res,columns=["park_code"])
    return_dict["dw_addr_park_code"]=res

def dw_addr_district_code(data,workers,return_dict):
    with concurrent.futures.ThreadPoolExecutor(workers) as executor:
        res = executor.map(getDwAddrDistrictCode, data["lng_lat_GCJ02_md5"])
    res = pd.DataFrame(res,columns=["district_code"])
    return_dict["dw_addr_district_code"]=res

manager = multiprocessing.Manager()
return_dict = manager.dict()

process_jobs = [dw_addr_admin_area_code,dw_addr_park_code,dw_addr_district_code]
process_jobs = [
    multiprocessing.Process(target=_, args=(data, workers, return_dict))
    for _ in process_jobs
                ]
[_.start() for _ in process_jobs]
while 1:
    process_statue = [_.is_alive() for _ in process_jobs]
    if sum(process_statue) == 0:
        break
[_.join() for _ in process_jobs]
# ##############################################################

异步

# https://docs.python.org/zh-cn/3/library/asyncio-task.html
import asyncio
import time
from datetime import datetime

async def say_after(delay, what):
    await asyncio.sleep(delay)
    print(what)
    return what

# 原版,没有启动到异步
async def main():  # 0:00:03.002929
    print(f"started at {time.strftime('%X')}")

    await say_after(1, 'hello')
    await say_after(2, 'world')

    print(f"finished at {time.strftime('%X')}")

# 异步1
async def main():  # 0:00:02.002949
    task1 = asyncio.create_task(
        say_after(1, 'hello'))

    task2 = asyncio.create_task(
        say_after(2, 'world'))

    print(f"started at {time.strftime('%X')}")

    # Wait until both tasks are completed (should take
    # around 2 seconds.)
    res = []
    a = await task1
    res.append(a)
    a = await task2
    res.append(a)

    print(f"finished at {time.strftime('%X')}")
    print(res)

# 异步2
async def main():  # 0:00:02.002968
    print(f"started at {time.strftime('%X')}")

    res = await asyncio.gather(
        say_after(1, 'hello'),
        say_after(2, 'world')
    )

    print(f"finished at {time.strftime('%X')}")
    print(res)

a = datetime.now()
asyncio.run(main())
print(datetime.now() -a )

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
[/public/home/pengjy/anaconda3] >>> PREFIX=/public/home/pengjy/anaconda3 WARNING: md5sum mismatch of tar archive expected: 8a581514493c9e0a1cbd425bc1c7dd90 got: 614f6284c34f91affd38a1be2e4be076 - Unpacking payload ... Traceback (most recent call last): File "entry_point.py", line 76, in <module> File "tarfile.py", line 2024, in extractall File "tarfile.py", line 2065, in extract File "tarfile.py", line 2137, in _extract_member File "tarfile.py", line 2186, in makefile File "tarfile.py", line 249, in copyfileobj tarfile.ReadError: unexpected end of data [210095] Failed to execute script entry_point concurrent.futures.process._RemoteTraceback: ''' Traceback (most recent call last): File "concurrent/futures/process.py", line 368, in _queue_management_worker File "multiprocessing/connection.py", line 251, in recv TypeError: __init__() missing 1 required positional argument: 'msg' ''' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "entry_point.py", line 69, in <module> File "concurrent/futures/process.py", line 484, in _chain_from_iterable_of_lists File "concurrent/futures/_base.py", line 611, in result_iterator File "concurrent/futures/_base.py", line 439, in result File "concurrent/futures/_base.py", line 388, in __get_result concurrent.futures.process.BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending. [210105] Failed to execute script entry_point 是什么问题,如何解决?
07-25

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值