1.什么是线程?
线程(Thread)是操作系统能够进行运算调度的最小单位,它被包含在进程中,是进程中的实际运作单位。 进程像一座房子,他是一个容器,有相应的属性,比如占地面积、卧室、厨房和卫生间等。房子本身没有主动做任何事情,线程这座房子的居住者,可以使用房子每一个房间、做饭、洗澡等 目的:提高效率 同一时间完成多项任务 技术上同步指前一个完成后 后面才会执行事情一件一件处理
函数形式:
1.单线程demo只能吃完龙虾后再喝啤酒;
2.多线程demo实现同时吃龙虾喝啤酒;
import threading
import time
"""单线程"""
"""多线程(提升资源利用率)"""
def drinking ( ) :
for x in range ( 3 ) :
print ( '正在喝啤酒:%s' % threading. current_thread( ) )
time. sleep( 1 )
def eating ( ) :
for x in range ( 3 ) :
print ( '正在吃小龙虾:%s' % threading. current_thread( ) )
time. sleep( 1 )
def main ( ) :
t1 = threading. Thread( target= drinking)
t2 = threading. Thread( target= eating)
t1. start( )
t2. start( )
if __name__ == '__main__' :
main( )
类形式:
1.类继承threading.Thread,的,定义run方法
import threading
import time
class DrinkingThread ( threading. Thread) :
def run ( self) :
for x in range ( 3 ) :
print ( '%s正在喝啤酒' % threading. current_thread( ) )
time. sleep( 1 )
class EatingThread ( threading. Thread) :
def run ( self) :
for x in range ( 3 ) :
print ( '%s正在撸串' % threading. current_thread( ) )
time. sleep( 1 )
def main ( ) :
t1 = DrinkingThread( )
t2 = EatingThread( )
t1. start( )
t2. start( )
if __name__ == '__main__' :
main( )
2.什么是互斥锁?
线程可以对全局变量随意修改,可能造成多线程之间对全局变量的混乱操作 “互斥锁”(Mutual exclusion 缩写 Mutex),防止多个线程同时读写某一块内存区域,互斥锁为资源引入一个状态:锁定和非锁定 互斥锁保证了每次只有一个线程进行写入操作,保障了多线程情况下数据正确性
threading模块中使用Lock类方便的处理锁定。Lock类有2个方法:acquire()锁定喝release()释放锁
gLock = threading.Lock # 创建锁
gLock.acquire() # 上锁
gLock.release() # 释放锁
import threading
VALUE = 0
gLock = threading. Lock( )
def add_value ( ) :
global VALUE
gLock. acquire( )
for x in range ( 1000000 ) :
VALUE += 1
gLock. release( )
print ( 'Value:%s' % VALUE)
def main ( ) :
for x in range ( 2 ) :
t = threading. Thread( target= add_value)
t. start( )
if __name__ == '__main__' :
main( )
3.生产者模式和消费者模式
import threading
import random
import time
gMoney = 666
gLock = threading. Lock( )
gTotalTimes = 10
gTime = 0
class Producer ( threading. Thread) :
def run ( self) :
global gMoney
global gTime
while True :
money = random. randint( 666 , 6666 )
gLock. acquire( )
if gTime >= gTotalTimes:
gLock. release( )
break
gMoney += money
gTime += 1
print ( "%s挣了%d元钱,余额%d元" % ( threading. current_thread( ) , money, gMoney) )
gLock. release( )
time. sleep( 0.5 )
class Consumer ( threading. Thread) :
def run ( self) :
global gMoney
global gTime
while True :
money = random. randint( 666 , 6666 )
gLock. acquire( )
if gMoney >= money:
gMoney -= money
print ( "%s消费了%d元钱,余额%d元" % ( threading. current_thread( ) , money, gMoney) )
else :
if gTime >= gTotalTimes:
gLock. release( )
break
print ( "%s准备消费类%d元钱,余额%d元,余额不足" % ( threading. current_thread( ) , money, gMoney) )
gLock. release( )
time. sleep( 1 )
def main ( ) :
for x in range ( 5 ) :
t = Producer( name= '生产者线程%s' % x)
t. start( )
for x in range ( 3 ) :
t = Consumer( name= '消费者线程%s' % x)
t. start( )
if __name__ == '__main__' :
main( )
1.消费者通过死循环上锁方式,查看钱够不够花,上锁是一个很耗CPU行为,不是最好的方式
2.可以使用threading.Condition
3.没有数据的时候,处在阻塞等待的状态,知道有数据才进行操作
Condition 版
* acquire 上锁
* release 释放锁
* wait 等待状态
* notify 通知正在等待的线程 默认通知第一个
* notify_all 通知所有正在等待的线程
* notify、notify_all 不会释放锁 必须在 release之前调用
import threading
import random
import time
gMoney = 666
gCondition = threading. Condition( )
gTotalTimes = 10
gTime = 0
class Producer ( threading. Thread) :
def run ( self) :
global gMoney
global gTime
global gCondition
while True :
money = random. randint( 666 , 6666 )
gCondition. acquire( )
if gTime >= gTotalTimes:
gCondition. release( )
print ( "老公总共生产了%s次" % gTime)
break
gMoney += money
gTime += 1
print ( "%s挣了%d元钱,余额%d元" % ( threading. current_thread( ) , money, gMoney) )
gCondition. notifyAll( )
gCondition. release( )
time. sleep( 0.5 )
class Consumer ( threading. Thread) :
def run ( self) :
global gMoney
global gTime
global gCondition
while True :
money = random. randint( 666 , 6666 )
gCondition. acquire( )
while gMoney < money:
print ( "%s准备消费类%d元钱,余额%d元,余额不足" % ( threading. current_thread( ) , money, gMoney) )
if gTime >= gTotalTimes:
gCondition. release( )
return
gCondition. wait( )
gMoney -= money
print ( "%s消费%d元钱,余额%d元" % ( threading. current_thread( ) , money, gMoney) )
gCondition. release( )
time. sleep( 1 )
def main ( ) :
for x in range ( 5 ) :
t = Producer( name= '生产者线程%s' % x)
t. start( )
for x in range ( 3 ) :
t = Consumer( name= '消费者线程%s' % x)
t. start( )
if __name__ == '__main__' :
main( )
4.Queue线程安全队列
1.把数据存到队列中 线程安全 python模块叫做queue
先进先出 Queue
后入先出 LifoQueue
2.上面实现了锁要么不做要么昨晚(原子操作)多线程中拿来直接用
Queue() # 初始化队列
qsize() # 返回队列大小
empty() # 判断是否为空
full() # 判断是否满了
get() # 从队列中取最后一个数据
put() # 将数据放到队列中
from queue import Queue
"""
1. Queue() 初始化队列
2. qsize() 返回队列大小
3. empty() 判断是否为空
4. full() 判断是否满了
5. get() 从队列中取最后一个数据
6. put() 将数据放到队列中
"""
import threading
import time
def set_value ( q) :
index = 0
while True :
q. put( index)
index += 1
time. sleep( 3 )
def get_value ( q) :
while True :
print ( q. get( ) )
def main ( ) :
q = Queue( 5 )
t1 = threading. Thread( target= set_value, args= [ q] )
t2 = threading. Thread( target= get_value, args= [ q] )
t1. start( )
t2. start( )
if __name__ == '__main__' :
main( )
5.实战案例-斗图网多线程爬取表情包
import threading
from queue import Queue
import requests
from lxml import etree
import re
import os
from urllib import request
class Producer ( threading. Thread) :
headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/83.0.4103.106 Safari/537.36 "
}
def __init__ ( self, page_queue, img_queue, * args, ** kwargs) :
super ( Producer, self) . __init__( * args, ** kwargs)
self. page_queue = page_queue
self. img_queue = img_queue
def run ( self) :
while True :
if self. page_queue. empty( ) :
break
url = self. page_queue. get( )
self. parse_page( url)
def parse_page ( self, url) :
response = requests. get( url, headers= self. headers)
text = response. text
html = etree. HTML( text)
imgs = html. xpath( "//div[@class='page-content text-center']//img[@class!='gif']" )
for img in imgs:
img_url = img. get( 'data-original' )
alt = img. get( 'alt' )
alt = re. sub( r'[?\?\.。,,!!\*]' , '' , alt)
suffix = os. path. splitext( img_url) [ 1 ]
filename = alt + suffix
self. img_queue. put( ( img_url, filename) )
class Consumer ( threading. Thread) :
def __init__ ( self, page_queue, img_queue, * args, ** kwargs) :
super ( Consumer, self) . __init__( * args, ** kwargs)
self. page_queue = page_queue
self. img_queue = img_queue
def run ( self) :
while True :
if self. img_queue. empty( ) and self. page_queue. empty( ) :
break
img_url, filename = self. img_queue. get( )
request. urlretrieve( img_url, 'images/' + filename)
print ( filename + '下载完成' )
def main ( ) :
page_queue = Queue( 100 )
img_queue = Queue( 1000 )
for x in range ( 1 , 101 ) :
url = 'https://www.doutula.com/photo/list/?page=%d' % x
page_queue. put( url)
for x in range ( 5 ) :
t = Producer( page_queue, img_queue)
t. start( )
for x in range ( 5 ) :
t = Consumer( page_queue, img_queue)
t. start( )
if __name__ == '__main__' :
main( )