import urllib.request
from urllib import request
from bs4 import BeautifulSoup
n=1#1.获取网页源代码
url ='https://www.duitang.com/search/?kw=%E6%98%93%E7%83%8A%E5%8D%83%E7%8E%BA&type=feed'defcrawl(url):#避免反爬 伪装 浏览器信息
headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}
req = request.Request(url,headers=headers)#创建伪装成功
page = urllib.request.urlopen(req,timeout=20)#设置超时,防止url不可访问
contens = page.read()# 获取源代码#print(contens)#2.解析出图链接 re pyquery selenium
soup = BeautifulSoup(contens,'html.parser')#自带的解析方式,另一种是lxml#im = soup.find_all('img')# for i in im:# link = i.get('src')# print(link)
im = soup.findAll('img',attrs={"alt":"易烊千玺"})for i in im:
link = i.get('src')#print(link)#3.写入文件(下载自行开放)# global n# urllib.request.urlretrieve(link,'images\%s.jpg'%n)# n+=1# print("正在下载第%s张"%n)