from bs4 import BeautifulSoup
import requests
import urllib
import ssl
import time
import lxml
x=190
ssl._create_default_https_context = ssl._create_unverified_context
def crawl(url):
headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36'}
req = requests.get(url,headers=headers)
print(req)
page=urllib.request.urlopen(url,timeout=15)
content = page.read()
soup = BeautifulSoup(content,'lxml')
girls=soup.find_all('img')
for girl in girls:
link = girl.get('src')
print(link)
time.sleep(1.2)
global x
urllib.request.urlretrieve(link,'image\%s.jpg' % x)
x+=1
for i in range (2,5):
url='https://www.douban.com/people/CINO.X/statuses?p=%s' % i
crawl(url)
Python bs4爬取图片
最新推荐文章于 2024-07-23 09:30:00 发布