import requests
import bs4
import time
from threading import Thread
from queue import Queue
global my_queue
my_queue = Queue()
start_time = time.time()
print(start_time)
class MyThread1(Thread):
def __init__(self):
Thread.__init__(self)
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/70.0.3538.110 Safari/537.36"}
def run(self):
response = requests.get("http://dianying.2345.com/top/", headers=self.headers)
html = response.text
soup = bs4.BeautifulSoup(html, "lxml")
movies_list = soup.find('ul', class_="picList clearfix")
movies = movies_list.find_all('li')
for top in movies:
img_url = "http:" + top.find('img')["src"]
print(img_url)
put_data =img_url
name = top.find('span', class_="sTit").a.text
my_queue.put((put_data, name))
try:
time = top.find('span', class_='sIntro').text
except:
time = "暂无上映时间"
actors = top.find('p', class_="pActor")
actor = ''
intro = top.find('p', class_="pTxt pIntroShow").text
print("片名:{}\t{}\n{}\n{} \n \n".format(name, time, actor, intro))
class MyThread2(Thread):
def __init__(self):
Thread.__init__(self)
def run(self):
get_data, name = my_queue.get()
with open('F:\spider\spider\image\\'+ name +'.png', 'wb+') as f:
f.write(requests.get(get_data).content)
if __name__ == '__main__':
thread1 = MyThread1()
thread2 = MyThread2()
thread1.start()
thread2.start()
thread1.join()
thread2.join()
end_time = time.time()
print(end_time)
print(end_time - start_time)
爬虫实战23:多线程爬取2345电影排行榜
最新推荐文章于 2022-10-31 15:27:06 发布