爬取百度贴吧(搞笑吧)的图片

import requests
from lxml import etree
import os

url = "https://tieba.baidu.com/f?ie=utf-8"

# 用户输入要爬取的贴吧名称
kw = input("请输入您要爬取贴吧的名称:")
params = {"kw": kw}

# 起始页
start = int(input("请输入您要爬取起始页(从1开始):"))
end = int(input("请输入爬取的截止页:"))

name = 1

for n in range(start, end+1):
    pn = (n - 1) * 50
    full_url = url + '&pn=' + str(pn)
    response = requests.get(full_url, params=params)

    print("获取第", n, "页的帖子链接。。。")
    print(response.url)

    content = response.content


    html = etree.HTML(content)
    tieba_urls = html.xpath('//div[@class="threadlist_lz clearfix"]/div/a/@href')

    print("tieba_urls===", )
    print(tieba_urls)

    # --------------------------

    for tieba_url in tieba_urls:
        tieba_url = 'https://tieba.baidu.com' + tieba_url

        print('具體貼吧tieba_url',tieba_url)

        response = requests.get(tieba_url)
        content = response.content

        html = etree.HTML(content)
        img_urls = html.xpath('//div[@class="d_post_content j_d_post_content "]/img[@class="BDE_Image"]/@src')

        for img_url in img_urls:
            print(img_url)

            # response = requests.get(img_url)
            path = './imagesll3/'
            if not os.path.exists(path):
                os.mkdir(path)

            print("正在下载图片:", img_url)

            response = requests.get(img_url)

            if response.status_code == 200:
                with open(path + str(name) + '.jpg', 'wb') as f:
                    for block in response.iter_content(1024):
                        if not block:
                            break
                        else:
                            f.write(block)
                            print('222')

            name += 1
            print('+1')

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值