13.使用Python抓取网页内容并解析出图片地址，将图片转为JPEG格式后保存本地

本文链接：https://blog.csdn.net/weixin_43292784/article/details/124122166

import os.path
import re
from io import BytesIO
from PIL import Image
import requests

if __name__ == "__main__":
    #伪装浏览器标识
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
    }
    #获取南瓜园论坛时尚街拍html源码
    html=requests.get(url='http://www.kankan2008.com/forum-126-1.html',headers=headers).text
    #使用正则表达式匹配帖子列表中的链接
    pattern='<th class="new">.*? <a href="(thread.*?html).*?</th>'
    lst=re.findall(pattern,html,re.S)
    #创建girls文件夹
    if not os.path.exists('girls'):
        os.mkdir('girls')
    for str in lst:
        url='http://www.kankan2008.com/'+str
        #获取帖子对应URL内html数据
        html=requests.get(url=url,headers=headers).text
        #匹配图片地址
        pattern='class="zoom" src="(.*?)" onmouseover'
        imgList=re.findall(pattern,html)
        #下载图片至girls文件夹
        for imgUrl in imgList:
            fileName=imgUrl.split('/')[-1].replace('webp','jpg')
            imgData=requests.get(url=imgUrl,headers=headers).content
            byteStream=BytesIO(imgData)
            im=Image.open(byteStream)
            im.save('girls/'+fileName,'JPEG')
            print(fileName,"下载完成")