import requests from lxml import etree response = requests.get("https://ibaotu.com/shipin/") html = etree.HTML(response.text) src_list = html.xpath('//div[@class = "video-play"]/video/@src') tit_list = html.xpath('//span[@class = "video-title"]/text()') for src,tit in zip(src_list,tit_list): content = requests.get("http:" + src).content filename = 'E:\\baotuwang\\' + tit + '.mp4' print("正在下载:{}".format(filename)) with open(filename , 'wb') as f: f.write(content)
分析:
1.找到所要爬取指定网址
2.分析爬取的数据
3.下载数据
4.保存数据