爬虫常用的几个库中,各有所长,不过目的都是一样的,今天写了个用etree函数的爬虫来爬取视频素材,分享一下
#https://ibaotu.com/shipin/
import requests
from lxml import html
etree=html.etree
class climb():
def __init__(self):
self.url='https://ibaotu.com/shipin/'
self.hearders={
} #测试后发现这个网站不需要header也是可行的
def get_text(self,num):
self.url=self.url+'7-0-0-0-0-'+str(num)+'.html'
print(self.url)
respond=requests.get(url=self.url)
html=etree.HTML(respond.content.decode())
title=html.xpath('//span[@class="video-title"]/text()')
link=html.xpath('//div[@class="video-play"]/video/@src')
for i,j in zip(title,link):
self.down_mp4(i,j)
def down_mp4(self,name,link):
file_name=name+'.mp4'
file=requests.get("http:"+link).content
path=r"E:\\untitled\\视频\\"+file_name
print('正在下载视频'+name)
with open(path,'wb')as f:
f.write(file)
def all_main(self):
num=int(input('请输入要爬取的页数1-223:'))
for i in range(1,num+1):
self.get_text(i)
self.__init__() #初始化了一下url,其实可以用其他方式,比如直接赋值
if __name__ == '__main__':
run=climb()
run.all_main()