python爬虫如何实现下载_Python爬虫实现下载某图片站内容

[Python] 纯文本查看 复制代码#!/usr/bin/env python3

# -*- coding: utf-8 -*-

#Code by 此称江初

import requests,os,threading

from lxml import etree

Url = "https://www.moestack.com/all"

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}

def Mkdir(path):

path = path.strip()

path = path.rstrip("\\")

isExists = os.path.exists(path)

if not isExists:

os.makedirs(path)

return True

else:

pass

def Get_Page(x):

Page = requests.get(x,headers=headers)

Page.encoding = "utf-8"

Page = Page.text

Page = etree.HTML(Page)

return Page

def end():

ImgUrl = GetImgUrl[i]

save_img = requests.get(ImgUrl, headers=headers)

with open(r"Moe/" + Title[0] + "/" + ImgUrl[-27:] + ".jpg", "wb") as fh:

fh.write(save_img.content)

fh.flush()

def DownImg():

global i

global t

path = "Moe/" + Title[0] + "/"

Mkdir(path)

threads = []

for i in range(len(GetImgUrl)):

t = threading.Thread(target=end,daemon=True)

t.start()

threads.append(t)

for t in threads:

t.join()

print("下载完成")

def OnePageDown(x):

global Title,GetImgUrl

GetImgUrl = Get_Page(x).xpath('//*/div[2]/div/div[1]/p/img/@src')

Title = Get_Page(x).xpath('//*[@class="entry-title"]/text()')

print("标题:" + Title[0])

print("一共有%d张图片"%len(GetImgUrl))

DownImg()

def PageDown(x):

ImgPageUrl = Get_Page(x).xpath('//*[@class="entry-media"]/div/a/@href')

for i in ImgPageUrl:

OnePageDown(i)

def AllDown(x):

PageNum = Get_Page(x).xpath('/html/body/div/div[3]/div/div[2]/div/div/main/div[2]/ul/li[6]/a/text()')

print("全站共有%d页"%int(PageNum[0]))

for i in range(int(PageNum[0])):

i = i + 1

if i == '1':

PageUrl = "https://www.moestack.com/all"

PageDown(PageUrl)

else:

PageUrl = "https://www.moestack.com/all" + "/page/" + str(i)

PageDown(PageUrl)

def main():

print("菜单:\n1.单页下载\n2.页面下载\n3.全站下载(Boom!!!)")

Choice = input("请选择:")

if Choice == '1':

ImgPageUrl = input("请输入链接:")

OnePageDown(ImgPageUrl)

elif Choice == '2':

PageUrl = input("请输入页面链接:")

PageDown(PageUrl)

elif Choice == '3':

AllDown(Url)

if __name__ == "__main__":

main()#Code by 此称江初

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值