进程和线程

qq_187352634

已于 2023-11-21 16:54:31 修改

阅读量69

点赞数

分类专栏： spider 文章标签：线程与进程

于 2023-11-21 11:04:47 首次发布

本文链接：https://blog.csdn.net/qq_37755459/article/details/134513616

版权

spider 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

进程和线程

创建
- 方法一
- 方法二
线程池
- 无返回值
- 有返回值
线程实例
多进程
线程和进程实例

进程：一个程序一个进程，操作系统分配资源（内存、线程）
线程：cpu运算调度的最小单位

创建

方法一

from threading import Thread


def thread_func(name):
    for i in range(10):
        print(name, i)
# 主线程中创建了3个子线程
if __name__ == '__main__':
    thread1 = Thread(target=thread_func, args=('张三',)) # 参数是元组
    thread2 = Thread(target=thread_func, args=('李四',)) # 参数是元组
    thread3 = Thread(target=thread_func, args=('王五',)) # 参数是元组
    thread1.start()
    thread2.start()
    thread3.start()
    print("主线程")

方法二

续承Thread类

from threading import Thread

class ThreadFunc(Thread):
    def __init__(self, name):
        super(ThreadFunc, self).__init__()
        self.name = name
    # 重写run函数
    def run(self):
        for i in range(10):
            print(self.name, i, sep="---")
# 主线程中创建了3个子线程
if __name__ == '__main__':
    thread1 = ThreadFunc('张三')
    thread2 = ThreadFunc('李四')
    thread3 = ThreadFunc('王五')
    thread1.start()
    thread2.start()
    thread3.start()
    print("主线程")

线程池

无返回值

from concurrent.futures import ThreadPoolExecutor

def work_thread(name):
    for i in range(8):
        print(name)
if __name__ == '__main__':
    with ThreadPoolExecutor(8) as thread_pool:
        for i in range(5):
            thread_pool.submit(work_thread, f"岳王{i}")

有返回值

返回值的顺序不确定，谁执行完，先返回谁

import time
from concurrent.futures import ThreadPoolExecutor

def work_thread(name, t):
    time.sleep(t)
    # print("you can call me ", name)
    return name
def func_return(outcome):
    print(outcome.result())
if __name__ == '__main__':
    with ThreadPoolExecutor(8) as thread_pool:
        # 分别睡3秒、1秒、2秒
        thread_pool.submit(work_thread, "岳王",3).add_done_callback(func_return)
        thread_pool.submit(work_thread, "张飞",1).add_done_callback(func_return)
        thread_pool.submit(work_thread, "刘备",2).add_done_callback(func_return)

返回值的顺序不确定。map返回值是生成器，按任务分发的顺序进行返回

import time
from concurrent.futures import ThreadPoolExecutor

def work_thread(name, t):
    time.sleep(t)
    # print("you can call me ", name)
    return name
def func_return(outcome):
    print(outcome.result())
if __name__ == '__main__':
    with ThreadPoolExecutor(8) as thread_pool:
        # 分别睡3秒、1秒、2秒
        res = thread_pool.map(work_thread, ['岳王', '张飞', '刘备'], [3, 1, 2])
        for i in res:
            print(i)

线程实例

from concurrent.futures import ThreadPoolExecutor
import requests
from lxml import etree
def get_data(page):
    data = {
        'limit': '20',
        'current': page,
        'pubDateStartTime':'',
        'pubDateEndTime':'',
        'prodPcatid': '1189',
        'prodCatid':'',
        'prodName':'',
    }
    url = '仅做格式http://www.xinfadi.com.cn/getPriceData.html未曾试用'
    res = requests.post(url, data=data).json()
    for i in res['list']:
        # print(i)
        one_row = i['prodCat']+','+i['prodPcat']+','+i['prodName']+','+i['lowPrice']+','+i['avgPrice']+','+i['highPrice']+','+i['specInfo']+','+i['place']+','+i['unitInfo']+','+i['pubDate']
        f.write(one_row)
        f.write('\n')
f = open('price.csv', 'w', encoding='utf-8')
if __name__ == '__main__':
    with ThreadPoolExecutor(8) as thread_pool:
        for page in range(1, 8):
            thread_pool.submit(get_data, page)

    # print(res['list'])

多进程

from multiprocessing import Process
def process_function(name):
    for k in range(80):
        print(name, k, sep='---')
if __name__ == '__main__':
    process1 = Process(target=process_function, args=("张飞",))
    process2 = Process(target=process_function, args=("关羽",))
    process1.start()
    process2.start()

线程和进程实例

链接: url

'''
    多进程练习：
    进程一：在网页中找出能下载图片的地址，把地址保存到队列中
    进程二：从队列中拿取图片地址，下载保存图片
    队列：在进程之间进行通信
'''
from multiprocessing import Process, Queue
import requests
from lxml import etree
from urllib import parse
from concurrent.futures import ThreadPoolExecutor
# 从主页提取每一张图片的ip
def extract_pics_ip(que):
    url = ''
    response1 = requests.get(url)
    response1.encoding = response1.apparent_encoding
    tree_html = etree.HTML(response1.text)
    pics = tree_html.xpath('//*[@id="flow"]/li/figure/a/img/@data-src')
    # print(pics)
    # print(len(pics))
    # exit(10)
    for pic in pics:
        # 拼接图片的地址
        # pic = parse.urljoin(url, pic)
        # response2 = requests.get(pic)
        # response2.encoding = response2.apparent_encoding
        # tree_html2 = etree.HTML(response2.text)
        # 选取每一张图片的地址
        # pic_ip = tree_html2.xpath('')
        # 队列里装入图片地址
        que.put(pic)
    que.put('finish')
# 保存每一张图片
def save_pic(url):
    # 以图片地址的最后编号命名
    pic_name = url.split('/')[-1]
    with open('./pics/'+pic_name, 'wb') as f:
        resp3 = requests.get(url)
        f.write(resp3.content)
    print('完成下载', url)
# 多线程保存图片
def download_pics(que):
    with ThreadPoolExecutor(5) as thread_works:
        while True:
            pic_ip = que.get()
            if pic_ip == 'finish':
                break
            thread_works.submit(save_pic, pic_ip)
if __name__ == '__main__':
    que = Queue()
    process1 = Process(target=extract_pics_ip, args=(que,))
    process2 = Process(target=download_pics, args=(que,))
    process1.start()
    process2.start()
# extract_pics_ip()