python Thrading demo - 线程

python Thrading demo - 线程



pyhon 多线程、多进程

线程与子进程的区别

进程和线程的区别在于粒度不同, 进程之间的变量(或者说是内存)是不能直接互相访问的, 而线程可以, 线程一定会依附在某一个进程上执行.我举个例子, 你在Windows下开一个IE浏览器, 这个IE浏览器是一个进程. 你用浏览器去打开一个pdf, IE就去调用Acrobat去打开, 这时Acrobat是一个独立的进程, 就是IE的子进程.而IE自己本身同时用同一个进程开了2个网页, 并且同时在跑两个网页上的脚本, 这两个网页的执行就是IE自己通过两个线程实现的.值得注意的是, 线程仍然是IE的内容, 而子进程Acrobat严格来说就不属于IE了, 是另外一个程序.
之所以是IE的子进程, 只是受IE调用而启动的而已.
Linux系统的实现打破了纯粹的进程与纯粹的线程之间的差异。在Linux系统下二者是本质一致的

密集型
https://blog.csdn.net/qq_33020901/article/details/80207594

计算密集型
IO密集型

并发和并行
https://blog.csdn.net/qq_27825451/article/details/78850336
并发:同一时间间隔交替进行
并行:同时进行
并发和并行其实概念上最核心的区别就在于是否“同时”

并行是同时

【Python3之多线程】
https://www.cnblogs.com/smallmars/p/7149507.html
结论:
多线程用于IO密集型,如socket,爬虫,web
多进程用于计算密集型,如金融分析



demo_1


# -*- coding: utf-8 -*-

import threading, time


def run(num):
    pass
    print("subThread({}) is start...".format(threading.current_thread().name))

    time.sleep(0.5)
    print(num)
    time.sleep(0.5)

    print("subThread({}) is stop.".format(threading.current_thread().name))


if __name__ == '__main__':
    pass
    print("mainThreading({}) is starting...".format(threading.current_thread().name))

    # create subThreading
    """
    target=function
    name = subThradingName
    args = subThreading input args(tupe)
    """
    subThreading = threading.Thread(target=run, name='runThreading', args=(1,))
    subThreading.start()

    # wait the mainThreading stop,then subThreading is stop
    subThreading.join() # if not write this line ,the mainThreading is stop befor subThreading

    print("mainThreading({}) is stop.".format(threading.current_thread().name))

demo_2 多线程

  1. 定义全局变量,便于资源共享
  2. 不定义线程锁。输出数字混乱
  3. 定义线程函数,并声明全局变量的使用范围,使用with对锁操作
# -*- coding: utf-8 -*-

import threading

num = 100

def run(n):
    global num

    for i in range(1000000):
        num = num + n
        num = num - n


if __name__ == '__main__':
    t1 = threading.Thread(target=run, args=(6, ))
    t2 = threading.Thread(target=run, args=(9, ))

    t1.start()
    t2.start()

    t1.join()
    t2.join()

    print("num = {}".format(num))
    print("mainThreading({}) is stop.".format(threading.current_thread().name))

demo_3 多线程_Lock

  1. 定义全局变量,便于资源共享
  2. 定义线程锁,只有当一个线程结束后,锁才会被释放,锁不被线程共享
  3. 定义线程函数,并声明全局变量的使用范围,使用with对锁操作
# -*- coding: utf-8 -*-

import threading

num = 100
lock = threading.Lock()

def run(n):
    global num

    for i in range(1000000):
        with lock:
            num = num + n
            num = num - n


if __name__ == '__main__':
    t1 = threading.Thread(target=run, args=(6, ))
    t2 = threading.Thread(target=run, args=(9, ))

    t1.start()
    t2.start()

    t1.join()
    t2.join()

    print("num = {}".format(num))
    print("mainThreading({}) is stop.".format(threading.current_thread().name))

demo_4 一起过马路

  1. 凑够3人才能过马路
    bar = threading.Barrier(3)
# -*- coding: utf-8 -*-

import threading, time

# 凑够3人才能过马路
bar = threading.Barrier(3)


def run():
    print("{} is starting...".format(threading.current_thread().name))

    time.sleep(1)
    bar.wait()

    print("{} is end.".format(threading.current_thread().name))


if __name__ == '__main__':
    for i in range(5):
        threading.Thread(target=run).start()

demo_5 Threading.Time

定时线程

# -*- coding: utf-8 -*-

import threading

def run():
    print('-*' * 10)
    print("runThreading is end.")


if __name__ == '__main__':
    print("mainThreading is start...")
    timeThreading = threading.Timer(3, run)
    timeThreading.start()
    timeThreading.join()
    print("mainThreading is end.")




多线程爬取demo

业务流

业务流

#!/usr/bin/python3
# -*- coding:utf-8 -*-
# @Software  : PyCharm
# @CreateTime: 2019-12-23 09:36
# @Author    : spider
# @File      : pyThread

import sys
import time
import pprint
import json
import re
from lxml import etree

import requests

import threading
from queue import Queue

"""
2类线程:3个下载,3个解析
内容队列:下载线程往队列中put数据,解析线程从队列中get数据
url队列: 下载线程从url队列get数据
写数据:上锁,保证文件不能同时被修改
"""

g_crawl_list = []
g_parse_list = []

class CrawThread(threading.Thread):
    def __init__(self, name, page_queue, data_queue):
        super(CrawThread, self).__init__()
        self.name = name
        self.page_queue = page_queue
        self.data_queue = data_queue
        self.url = r"{}"
        self.hreaders = {}

    def run(self):
        print("{} ---------- crawl_thread start".format(self.name))
        while True:
            if self.page_queue.empty:
                break
            # 从队列中取出页码
            page = self.page_queue.get()

            # 拼接url,发送请求
            url = self.url.format(page)
            res = requests.get(url, headers=self.hreaders)
            if res.ok:
                # 将响应内容存放到data_queue
                self.data_queue.put(res.text)
        print("{} ---------- crawl_thread stop".format(self.name))



class ParserThread(threading.Thread):
    def __init__(self, name, data_queue, fp, lock):
        super(ParserThread, self).__init__()
        self.name = name
        self.data_queue = data_queue
        self.lock = lock
        self.fp = fp

    def run(self):
        while True:
            if self.data_queue.empty():
                break
        print("{} ---------- parse_thread start".format(self.name))
        # 从data_queue中取出一页数据
        pageSourceHtml = self.data_queue.get()
        # 解析内容
        self.parse_content(pageSourceHtml)
        print("{} ---------- parse_thread stop".format(self.name))

    def parse_content(self, pageSourceHtml):
        tree = etree.HTML(pageSourceHtml)
        li_list = tree.xpath("//li")

        items = []
        for oli in li_list:
            title = 'title'
            imgLink = 'imgLink'
            item = {
                'title':title,
                'imgLink':imgLink,
            }
            items.append(item)
        # write to jsonFile
        self.lock.acquire()
        self.fp.write(json.dumps(items), ensure_acsii=False)
        self.lock.release()

def function():
    print("in function...")
    print("-=" * 90)
    try:
        pass
    except Exception as e:
        print('\nLine_{:0>5d} in {} - {}'.format(
            sys._getframe().f_lineno, __file__, e))
    finally:
        pass

def create_queue():
    # 创建 页码 队列
    page_queue = Queue()
    for page in range(1, 11):
        page_queue.put(page)

    # 创建 内存 队列
    data_queue = Queue()
    return page_queue, data_queue

def create_crawl_thread(page_queue, data_queue):
    crawl_name = ['crawlThread1', 'crawlThread2', 'crawlThread3']
    for name in crawl_name:
        tCrawl = CrawThread(name, page_queue, data_queue)
        g_crawl_list.append(tCrawl)

def create_parser_thread(data_queue, fp, lock):
    crawl_name = ['parseThread1', 'parseThread2', 'parseThread3']
    for name in crawl_name:
        tParse = ParserThread(name, data_queue, fp, lock)
        g_parse_list.append(tParse)

def main():
    # 打开文件
    fp = open('jian.json', 'a', encoding='utf8')
    # 创建锁
    lock = threading.Lock()
    # 创建队列函数
    page_queue, data_queue = create_queue()

    # 创建 采集 线程
    create_crawl_thread(page_queue, data_queue)
    # 创建 解析 线程
    create_parser_thread(data_queue, lock, fp)

    # 启动所有 采集 线程
    for tCrawl in g_crawl_list:
        tCrawl.start()
    # 启动所有 解析 线程
    for tPrase in g_parse_list:
        tPrase .start()
        
    # 主线程等待子线程结束
    for tCrawl in g_crawl_list:
        tCrawl.join()
    for tPrase in g_parse_list:
        tPrase .join()



if __name__ == '__main__':
    print("in startMain...")
    print("-=" * 90)
    main()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值