import requests import re import time from bs4 import BeautifulSoup count = 1 url = 'http://jandan.net/ooxx/page-1#comments' url_used = [] url_used.append(url) list1 = [] headers = { 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} while len(url_used): r = requests.get(url, headers = headers) a = r.text soup = BeautifulSoup(a, 'html.parser') lq = (soup.find("a", {'href': re.compile("^http://jandan.net/ooxx/page-\d*#comments")})) list = soup.findAll("img", {'src': re.compile(".*\.jpg")}) for i in list: list1.append(i['src']) url = lq['href'] print(url) url_used.append(url) for i in list1: try: b = requests.get('http:' + i, headers=headers, timeout=2) print("正在下载第 %d 张" % count) count += 1 path = re.split('/', i)[-1] with open(path, 'wb') as f: f.write(b.content) except: print('error') continue list1 = []
Python 爬取妹子图
最新推荐文章于 2024-07-12 16:16:27 发布