前言
近期有许多项目需要这个功能,由于Python实现起来比较简单就这么做了,代码贴下来觉得好点个赞吧~
代码
# coding: utf-8
import os
import time
import requests
import urllib.parse
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from fake_useragent import UserAgent
from multiprocessing.pool import ThreadPool
LOCATIONS = {}
GLOBAL_THREAD = 500
GLOBAL_TIMEOUT = 50
def get_links(keyword, generator, pages):
links = []
for page in range(int(pages.split("-")[0]), int(pages.split("-")[1]) + 1):
for genera in range(int(generator.split("-")[0]), int(generator.split("-")[1]) + 1):
links.append(
"http://www.baidu.com.cn/s?wd=" + urllib.parse.quote(keyword + str(genera)) + "&pn=" + str(page * 10))
re