使用python3爬取美女图片

最新推荐文章于 2022-03-31 09:36:11 发布

小叶丶

最新推荐文章于 2022-03-31 09:36:11 发布

阅读量1.3k

点赞数 2

本文链接：https://blog.csdn.net/weixin_42038955/article/details/88771992

版权

给大佬们观赏观赏，爬取美女图片，各位大佬们小心身子哈。

#目标：爬取网页所有图片并保存到本地
#目标url = http://www.umei.cc/tags/meishaonv_1.htm

import requests	#自动爬去html页面，自动请求网络提交
from bs4 import BeautifulSoup	#解析HTML/XMl页面，提取数据或信息
import time

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
url = 'http://www.umei.cc/tags/meishaonv_1.htm'
urls = requests.get(url,headers=headers)
urls.encoding = urls.apparent_encoding
text = urls.text
soup = BeautifulSoup(text, 'html.parser')

#获取所有页面url
tupian = []
for i in soup.find_all('li', class_="hide"):
    asd = i.find('a').get('href') #爬取a标签中的href属性内容
    if asd:
        tupian.append(url)
        tupian.append('http://www.umei.cc' + asd)
        
		
asdffg = []
#遍历那些所有的也页面，爬取每个页面中的所有图片
for i in tupian:
    time.sleep(2)
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
    urls = requests.get(i,headers=headers)
    urls.encoding = urls.apparent_encoding #转换编码
    text = urls.text
    soup = BeautifulSoup(text, 'html.parser')
    fda = soup.find('div', class_="TypeList") #爬取第一个div标签中带有class_="TypeList"属性的所有子标签信息
    asdf = [ss.get('src') for ss in fda.select('ul li a img')] #爬取fda变量中所有的img标签，并使用get只爬取src属性的值
    for y in asdf:
        asdffg.append(y)

		
for ii in asdffg:
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
    urls = requests.get(ii, headers=headers)
    with open('D:/IT/图片/' + ii[-7:], 'wb') as f: #自行在该目录创建文件夹
        f.write(urls.content) #保存二进制图片要用content方法
    print('成功！',ii[-7:])

运行结果：
在这里插入图片描述