在做爬虫项目的时候,想要提高相应的爬取效率,多线程爬虫是避免不了的。
下面,跟着小编直接进入主题,代码如下:
import requests
from threading import Thread
import random
from queue import Queue
import time
# 目标keyword
keyword_list = ['ring', 'rings for women', 'box', 'book', 'toy']
ua = ['Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36']
headers = {'User-Agent': random.choice(ua),
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding