python爬虫+图片爬取

爬取的是美女贴吧中的图片
代码如下:

import requests
from lxml import etree
import urllib
class Tieba:
    def __init__(self, name, start_page, end_page):

        self.headers = {"User-Agent": "Mozilla/5.0"}
        self.name = name
        self.start_page = start_page
        self.end_page = end_page

    def gettieba_ulr(self):
        """
        获取帖子url
        """
        for i in range(self.start_page - 1, self.end_page):
            print(name)
            url = "https://tieba.baidu.com/f?kw=" + self.name + "&ie=utf-8&pn=" + str(i * 50)
            # print(url)

            pattern = requests.get(url)

            # pattern.encoding = "utf-8"

            xml = etree.HTML(pattern.text)
            tiezi_list = xml.xpath('//div[@class="t_con cleafix"]//a[@class="j_th_tit "]/@href')
            # tiezi_list = xml.xpath('//*[@id="thread_list"]/li[10]/div/div[2]/div[1]/div[1]/a/@href')
            print("*" * 25 + str(i + 1) + "*" * 25)
            # print(tiezi_list)
            for tiezi_url in tiezi_list:
                # print(tiezi_url)
                self.getImage(tiezi_url)

    def getImage(self, tiezi_url):
        """
        获取图片
        """
        url = "https://tieba.baidu.com"
        full_url = url + tiezi_url
        # print(full_url)
        pattern1 = requests.get(url=full_url)
        # pattern1.encoding = "utf-8"
        mxl1 = etree.HTML(pattern1.text)
        img_list = mxl1.xpath('//div[@class="p_content  "]//img[@class="BDE_Image"]/@src')
        # print(img_list[:3])
        for img in img_list:
            print(img)
            self.downloadImage(img)

    def downloadImage(self, img):
        """
        图片保存
        """
        print(img)
        try:
            image = requests.get(url=img, stream=True)
            filename = img[-10:]
            file = "H:\\图片\\" + filename
            with open(file, "wb") as f:
                try:

                    with open(file, "wb") as f:
                        for byte in image.iter_content(chunk_size=1024):
                            f.write(byte)
                except Exception as reason:
                    print(reason.args)
                except:
                    print("{0},下载失败.")
                else:
                    print("{0},下载完成.")
        except:
            pass


if __name__ == "__main__":
    name = input("请输入想要爬取得贴吧:")
    start_page = int(input("请输入想要爬取得贴的起始页:"))
    end_page = int(input("请输入想要爬取得贴的末尾页:"))
    tieba = Tieba(name, start_page, end_page)
    tieba.gettieba_ulr()

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值