线程通信
Python提供了非常简单的通信机制 Threading.Event,通用的条件变量。多个线程可以等待某个事件的发生,在事件发生后,所有的线程都会被激活。
import threading
import time
def goevent():
e = threading.Event() # 事件
def go():
e.wait() # 等待事件,线程卡顿,等待set消息
print("go")
threading.Thread(target=go).start() # 需要创建一个线程
return e
t = goevent()
time.sleep(3)
t.set() # 激发事件
线程通信强化
import threading
import time
def goevent():
e = threading.Event() # 事件
def go():
for i in range(10):
e.wait() # 等待事件,线程卡顿,等待set消息,只调用一次
e.clear() # 重置线程等待
print("go",i)
threading.Thread(target=go).start() # 创建一个线程
return e
t = goevent()
for i in range(5):
time.sleep(i)
t.set()
condition线程通讯与事件
import threading
import time
def go1():
with cond:
for i in range(10):
time.sleep(1)
print(threading.current_thread().name, i)
if i == 5:
cond.wait() # 等待,只有在其他相同线程条件变量唤醒时才继续执行
print("hahahha")
'''
wait()
此方法释放底层的锁,然后阻塞,直到它
通过notify()或notify_all()调用唤醒相同的条件
在另一个线程中变量,或直到发生可选的超时。一旦
唤醒或超时,它重新获得锁定并返回。
'''
def go2():
with cond: # 使用条件变量
for i in range(10):
time.sleep(1)
print(threading.current_thread().name, i)
cond.notify() # 通知唤醒其他线程
'''
notify()
在这种情况下唤醒一个或多个线程,如果有的话。
如果调用线程没有获得这个方法的锁
称为,引发了一个RuntimeError。
这个方法至多唤醒n个等待条件的线程
变量; 如果没有线程正在等待,那么这是一个无操作。
'''
cond = threading.Condition() # 线程条件变量
threading.Thread(target=go1).start()
threading.Thread(target=go2).start()
'''
代码逻辑
cond只有一个,线程1线锁定cond,当线程1跑到i==5的时候
此时进入condition等待,将资源释放出来,
这时候线程2进入,一口气全部跑完i,跑到最后以cond.notifly通知
将资源再放出来,此时线程1重新锁定
'''
线程调度
import threading
import time
def go1():
with cond:
for i in range(0, 10, 2):
time.sleep(1)
print(threading.current_thread().name, i)
cond.wait()
# print("hahah")
cond.notify()
def go2():
with cond:
for i in range(1, 10, 2):
time.sleep(1)
print(threading.current_thread().name, i)
cond.notify()
cond.wait()
cond = threading.Condition() # 线程条件变量
threading.Thread(target=go1).start()
threading.Thread(target=go2).start()
'''
逻辑:
首先明确wait()调用后下面的程序是不会运行的,
首先线程1线绑定cond,打印出0后,线程1进入等待(注意此时线程2并没有绑定),线程2绑定cond,打印出1后
notify给线程1唤醒wait(),(此时才打印出"haha"),同时线程2的wait激活进入等待,同时1打印出2,并唤醒线程2如此循环
'''
生产者消费者模式
import threading
import time
import queue
q = queue.Queue(maxsize=10)
def producer(name): # 生产者
count = 1
while True:
q.put("骨头%s" % count)
print("生产了骨头", count)
count += 1
time.sleep(0.5)
def consumer(name): # 消费者
while True:
print("[%s]取到[%s]并且吃了它..." % (name, q.get()))
time.sleep(1)
p = threading.Thread(target=producer, args=("Tim",))
c1 = threading.Thread(target=consumer, args=("King",))
c2 = threading.Thread(target=consumer, args=("Wang2",))
c3 = threading.Thread(target=consumer, args=("Wang3",))
c4 = threading.Thread(target=consumer, args=("Wang4",))
c5 = threading.Thread(target=consumer, args=("Wang5",))
p.start()
c1.start()
c2.start()
c3.start()
c4.start()
c5.start()
线程池
主线程中可以获取某一个线程的状态或者某一个的任务的状态以及返回值
当一个线程完成的时候主线程能立即知道
import threadpool # 需要安装
import time
def show(name):
print('hello', name)
namelist = ["A", "B", "C", "D"]
start = time.time()
pool = threadpool.ThreadPool(7) # 线程池最大数,貌似还要远大于列表长度
requests = threadpool.makeRequests(show, namelist) # 设置参数,函数,参数列表
print(requests)
print()
for req in requests:
pool.putRequest(req) # 压入线程池开始执行
end = time.time()
print(end - start)
with
import threading
num = 0 # 全局变量可以在线程之间传递
mutex = threading.Lock() # 创建一个锁,threading.Lock()是一个类
class Myhtread(threading.Thread):
def run(self):
global num
with mutex:
for i in range(1000000): # 数字小的时候还是不会产生线程冲突的
num += 1
print(num)
mythread = []
for i in range(5):
t = Myhtread()
t.start()
mythread.append(t)
for thread in mythread:
thread.join() # 或者直接将thread.join()加入for i in range(5),也能解决线程冲突,但是貌似就变成单线程了
print("game over")
'''
with 作用自动打开和释放,python3新功能
'''
例子
# 爬取清华新闻的标题存入数据库
import requests
import threading
import pymysql
class tsinghua(threading.Thread):
def __init__(self, target_url, total_page, lock):
threading.Thread.__init__(self)
self.target_url = target_url
self.total_page = total_page
self.db = pymysql.connect(host='127.0.0.1',
port=3306,
user='root',
password='root',
db='tsinghua')
self.id = 0
self.lock = lock
def run(self):
for page in range(1, self.total_page + 1):
if page == 1:
url = self.target_url.format('index')
else:
url = self.target_url.format('index_' + str(page))
response = requests.get(url)
response.encoding = 'utf8'
html = response.text
for line in html.split('\n'):
if 'class="jiequ"' in line:
title = line.split('class="jiequ">')[1].split('</a>')[0]
with self.lock:
self.id += 1
sql = 'INSERT INTO news (id,title) values ({},"{}");'.format(
self.id, title)
self.write_to_mysql(sql)
print('[+] writ mysql over ~')
print('[+] current page: {} ok'.format(page))
def write_to_mysql(self, sql):
cursor = self.db.cursor()
try:
cursor.execute(sql)
self.db.commit()
except Exception as e:
print(e)
self.db.rollback()
def Running():
lock = threading.Lock()
thread_1 = tsinghua(
'http://news.tsinghua.edu.cn/publish/thunews/9648/{}.html', 30, lock)
thread_1.start()
thread_1.join()
print('Over !')
if __name__ == "__main__":
Running()
正则表达式
例子
# 爬取89IP的信息存入本地
import re
import requests
import threading
class ip(threading.Thread):
def __init__(self, target_url, total_page, lock):
threading.Thread.__init__(self)
self.target_url = target_url
self.total_page = total_page
self.lock = lock
def run(self):
for page in range(1, self.total_page + 1):
url = self.target_url.format('index_' + str(page))
response = requests.get(url)
response.encoding = 'utf8'
html = response.text
res = re.compile('.*<td>\n\t\t.(.*?)\t\t</td>.*')
ip = res.findall(html)
with self.lock:
self.write(ip)
print('[+] current page: {} ok'.format(page))
def write(self,ip):
str_ = str(ip) +'\n'
with open('ip.txt',mode = 'a',encoding = 'utf8') as file:
file.write(str_)
def Running():
lock = threading.Lock()
thread_1 = ip(
'http://www.89ip.cn/index_{}.html', 10, lock)
thread_1.start()
thread_1.join()
print('Over !')
if __name__ == "__main__":
Running()