python核心编程笔记(3)

最新推荐文章于 2022-10-01 23:42:43 发布

cokezkx

最新推荐文章于 2022-10-01 23:42:43 发布

阅读量147

点赞数

分类专栏：笔记

本文链接：https://blog.csdn.net/cokezkx/article/details/104096195

版权

笔记专栏收录该内容

4 篇文章 0 订阅

订阅专栏

1.多线程，多进程和线程池

1.1 GIL

gil:global interpreter lock （cpython）全局解释器；
python中一个线程对应于c语言中的一个线程；
gil使得同一个时刻只有一个线程在一个cpu上执行字节码, 无法将多个线程映射到多个cpu上执行；
gil会根据执行的字节码行数以及时间片释放gil，gil在遇到io的操作时候主动释放。

1.2 多线程

对于io操作来说，多线程和多进程性能差别不大

import time
import threading

def get_detail_html(url):
    print("get detail html started")
    time.sleep(2)
    print("get detail html end")


def get_detail_url(url):
    print("get detail url started")
    time.sleep(4)
    print("get detail url end")

class GetDetailHtml(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)

    def run(self):
        print("get detail html started")
        time.sleep(2)
        print("get detail html end")


class GetDetailUrl(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)

    def run(self):
        print("get detail url started")
        time.sleep(4)
        print("get detail url end")

if  __name__ == "__main__":
    thread1 = GetDetailHtml("get_detail_html")
    thread2 = GetDetailUrl("get_detail_url")
    start_time = time.time()
    thread1.start()
    thread2.start()

    thread1.join()
    thread2.join()

    #当主线程退出的时候， 子线程kill掉
    print ("last time: {}".format(time.time()-start_time))

1.3线程间通信

最好通过queue的方式进行线程间同步

1.4线程同步

from threading import Lock, RLock, Condition #可重入的锁

#在同一个线程里面，可以连续调用多次acquire， 一定要注意acquire的次数要和release的次数相等
total = 0
lock = RLock()
def add():
    #1. dosomething1
    #2. io操作
    # 1. dosomething3
    global lock
    global total
    for i in range(1000000):
        lock.acquire()
        lock.acquire()
        total += 1
        lock.release()
        lock.release()


def desc():
    global total
    global lock
    for i in range(1000000):
        lock.acquire()
        total -= 1
        lock.release()

import threading
thread1 = threading.Thread(target=add)
thread2 = threading.Thread(target=desc)
thread1.start()
thread2.start()

参考：
https://blog.csdn.net/ybdesire/article/details/80294638

条件变量condition：
https://www.cnblogs.com/yoyoketang/p/8337118.html
重点：wait(timeout): 线程挂起，直到收到一个notify通知或者超时（可选的，浮点数，单位是秒s）才会被唤醒继续运行。

Semaphore信号量：
用于控制进入数量的锁。文件，读、写，写一般只是用于一个线程写，读可以允许有多个。

import threading
import time

class HtmlSpider(threading.Thread):
    def __init__(self, url, sem):
        super().__init__()
        self.url = url
        self.sem = sem

    def run(self):
        time.sleep(2)
        print("got html text success")
        self.sem.release()

class UrlProducer(threading.Thread):
    def __init__(self, sem):
        super().__init__()
        self.sem = sem

    def run(self):
        for i in range(20):
            self.sem.acquire()
            html_thread = HtmlSpider("https://baidu.com/{}".format(i), self.sem)
            html_thread.start()

if __name__ == "__main__":
    sem = threading.Semaphore(3)
    url_producer = UrlProducer(sem)
    url_producer.start()

1.5 线程池

线程池，为什么要线程池
主线程中可以获取某一个线程的状态或者某一个任务的状态，以及返回值
当一个线程完成的时候我们主线程能立即知道
futures可以让多线程和多进程编码接口一致

from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED
from concurrent.futures import Future
from multiprocessing import Pool

#未来对象，task的返回容器
import time

def get_html(times):
    time.sleep(times)
    print("get page {} success".format(times))
    return times



executor = ThreadPoolExecutor(max_workers=2)
#通过submit函数提交执行的函数到线程池中, submit 是立即返回
# task1 = executor.submit(get_html, (3))
# task2 = executor.submit(get_html, (2))


#要获取已经成功的task的返回
urls = [3,2,4]
all_task = [executor.submit(get_html, (url)) for url in urls]
wait(all_task, return_when=FIRST_COMPLETED)
print("main")

1.6 多进程

import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from concurrent.futures import ProcessPoolExecutor
#多进程编程
#耗cpu的操作，用多进程编程， 对于io操作来说， 使用多线程编程，进程切换代价要高于线程

#1. 对于耗费cpu的操作，多进程由于多线程
# def fib(n):
#     if n<=2:
#         return 1
#     return fib(n-1)+fib(n-2)
#
# if __name__ == "__main__":
#     with ThreadPoolExecutor(3) as executor:
#         all_task = [executor.submit(fib, (num)) for num in range(25,40)]
#         start_time = time.time()
#         for future in as_completed(all_task):
#             data = future.result()
#             print("exe result: {}".format(data))
#
#         print("last time is: {}".format(time.time()-start_time))

#2. 对于io操作来说，多线程优于多进程
def random_sleep(n):
    time.sleep(n)
    return n

if __name__ == "__main__":
    with ProcessPoolExecutor(3) as executor:
        all_task = [executor.submit(random_sleep, (num)) for num in [2]*30]
        start_time = time.time()
        for future in as_completed(all_task):
            data = future.result()
            print("exe result: {}".format(data))

        print("last time is: {}".format(time.time()-start_time))

import multiprocessing

#多进程编程
import time
def get_html(n):
    time.sleep(n)
    print("sub_progress success")
    return n


if __name__ == "__main__":

    #使用线程池
    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    # result = pool.apply_async(get_html, args=(3,))
    #
    # #等待所有任务完成
    # pool.close()
    # pool.join()
    #
    # print(result.get())

    #imap
    # for result in pool.imap(get_html, [1,5,3]):
    #     print("{} sleep success".format(result))

    for result in pool.imap_unordered(get_html, [1,5,3]):
        print("{} sleep success".format(result))

1.6.1 进程间通信

import time
from multiprocessing import Process, Queue, Pool, Manager, Pipe


def producer(queue):
    queue.put("a")
    time.sleep(2)

def consumer(queue):
    time.sleep(2)
    data = queue.get()
    print(data)

if __name__ == "__main__":
    queue = Queue(10)
    my_producer = Process(target=producer, args=(queue,))
    my_consumer = Process(target=consumer, args=(queue,))
    my_producer.start()
    my_consumer.start()
    my_producer.join()
    my_consumer.join()

本章对应书的第四章，读过会有更清楚的理解。
书中线程：

import threading
from time import time, ctime

class MyThread(threading.Thread):
    def __init__(self, func, args, name='', verb=False):
        threading.Thread.__init__(self)
        self.name = name
        self.func = func
        self.args = args
        self.verb = verb

    def getResult(self):
        return self.res

    def run(self):
        if self.verb:
            print ('starting', self.name, 'at:', ctime())
        self.res = self.func(*self.args)
        if self.verb:
            print (self.name, 'finished at:', ctime())

2.

cokezkx

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python核心编程笔记(3)

1.多线程，多进程和线程池1.1 GILgil:global interpreter lock （cpython）全局解释器；python中一个线程对应于c语言中的一个线程；gil使得同一个时刻只有一个线程在一个cpu上执行字节码, 无法将多个线程映射到多个cpu上执行；gil会根据执行的字节码行数以及时间片释放gil，gil在遇到io的操作时候主动释放。1.2 多线程对于io操作来...
复制链接

扫一扫