import threading
import time
def coding():
for x in range(3):
print('正在写代码%s' % x)
time.sleep(1)
def drawing():
for x in range(3):
print('正在画画%s' % x)
time.sleep(1)
if __name__ == '__main__':
t1 = threading.Thread(target=coding)
t2 = threading.Thread(target=drawing)
t1.start()
t2.start()
类实现
import threading
import time
class CodingThread(threading.Thread):
def run(self):
for x in range(3):
print('正在写代码%s' % threading.current_thread())
time.sleep(1)
class DrawingThread(threading.Thread):
def run(self):
for x in range(3):
print('正在画画%s' % threading.current_thread())
time.sleep(1)
if __name__ == '__main__':
t1 = CodingThread()
t2 = DrawingThread()
t1.start()
t2.start()
Lock版本的生产者和消费者模式
import random
import threading
import time
gMoney = 1000
gLock = threading.Lock()
gTotalTimes = 10
gTimes = 0
class Producer(threading.Thread):
def run(self):
global gMoney
global gTimes
while True:
money = random.randint(100, 1000)
gLock.acquire()
if gTimes >= gTotalTimes:
gLock.release()
break
gMoney += money
print('%s生产了%d元钱, 剩余%d元钱' % (threading.current_thread(), money, gMoney))
gTimes += 1
gLock.release()
time.sleep(0.5)
class Consumer(threading.Thread):
def run(self):
global gMoney
while True:
money = random.randint(100, 1000)
gLock.acquire()
if gMoney >= money:
gMoney -= money
print('%s消费者消费了%d元钱, 剩余%d元钱' % (threading.current_thread(), money, gMoney))
else:
if gTimes >= gTotalTimes:
gLock.release()
break
print('%s消费者准备消费%d元钱,剩余%d元钱,不足!' % (threading.current_thread(), money, gMoney))
gLock.release()
time.sleep(0.5)
if __name__ == '__main__':
for x in range(5):
t = Producer()
t.start()
for x in range(3):
t = Consumer()
t.start()
Condition版本的生产者与消费者模式
import random
import threading
import time
gMoney = 1000
gCondition = threading.Condition()
gTotalTimes = 10
gTimes = 0
class Producer(threading.Thread):
def run(self):
global gMoney
global gTimes
while True:
money = random.randint(100, 1000)
gCondition.acquire()
if gTimes >= gTotalTimes:
gCondition.release()
break
gMoney += money
print('%s生产了%d元钱, 剩余%d元钱' % (threading.current_thread(), money, gMoney))
gTimes += 1
# 生产完就通知消费者
gCondition.notify_all()
gCondition.release()
time.sleep(0.5)
class Consumer(threading.Thread):
def run(self):
global gMoney
while True:
money = random.randint(100, 1000)
gCondition.acquire()
# 在这里不能用if,会有问题
while gMoney < money:
if gTimes >= gTotalTimes:
gCondition.release()
return
print('%s准备消费%d元钱, 剩余%d元钱, 不足!' % (threading.current_thread(), money, gMoney))
gCondition.wait()
gMoney -= money
print('%s消费了%d元钱, 剩余%d元钱' % (threading.current_thread(), money, gMoney))
gCondition.release()
time.sleep(0.5)
if __name__ == '__main__':
for x in range(2):
t = Producer()
t.start()
for x in range(3):
t = Consumer()
t.start()
2. 斗图爬虫
单线程实现
import requests
from lxml import etree
import re
import os
from urllib import request
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
}
url = "https://www.doutula.com/photo/list/?page=1"
response = requests.get(url, headers=headers)
htmlE = etree.HTML(response.text)
imgs = htmlE.xpath("//div[@class='page-content text-center']//img[@class!='gif']")
for img in imgs:
img_url = img.get("data-original")
alt = img.get("alt")
alt = re.sub(r"[\??,,\.!!]","", alt)
suffix = os.path.splitext(img_url)[1]
filename = alt + suffix
request.urlretrieve(img_url, "images/" + filename)
多线程(生产者消费者模式)实现
import os
import re
import threading
from queue import Queue
from urllib import request
import requests
from lxml import etree
import time
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
}
class Producer(threading.Thread):
def __init__(self, page_queue, image_queue, *args, **kwargs):
super(Producer, self).__init__(*args, **kwargs)
self.page_queue = page_queue
self.image_queue = image_queue
def run(self):
while True:
if self.page_queue.empty():
break
print("---")
url = self.page_queue.get()
self.parse_page(url)
def parse_page(self, url):
response = requests.get(url, headers=HEADERS)
htmlE = etree.HTML(response.text)
imgs = htmlE.xpath("//div[@class='page-content text-center']//img[@class!='gif']")
for img in imgs:
img_url = img.get("data-original")
alt = img.get("alt")
alt = re.sub(r"[\??,,\.!!*]", "", alt)
suffix = os.path.splitext(img_url)[1]
filename = alt + suffix
self.image_queue.put((img_url, filename))
print(self.image_queue.qsize())
time.sleep(0.5)
class Consumer(threading.Thread):
def __init__(self, page_queue, image_queue, *args, **kwargs):
super(Consumer, self).__init__(*args, **kwargs)
self.page_queue = page_queue
self.image_queue = image_queue
def run(self):
while True:
if self.image_queue.empty() and self.page_queue.empty():
break
image_url, filename = self.image_queue.get()
request.urlretrieve(image_url, "images/" + filename)
print(filename + "下载完成。。。")
time.sleep(0.5)
if __name__ == '__main__':
url = "https://www.doutula.com/photo/list/?page={}"
page_queue = Queue(10)
image_queue = Queue(1000)
for i in range(1, 3):
page_queue.put(url.format(i))
for x in range(5):
t = Producer(page_queue, image_queue)
t.start()
time.sleep(1)
for x in range(2):
t = Consumer(page_queue, image_queue)
t.start()
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver_path = r"D:\chromedriver\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.get("http://www.douban.com")
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, 'app-title'))
)
print(element)