1.梨视频视频链接
梨视频
将主页的视频链接以及**
用re找到子页面的链接,用xpath找到视频链接的名字,heard头文件加入Referer可以*防盗链。
import requests
from lxml import etree
import csv
import re
url = 'https://www.pearvideo.com/'
obj = re.compile(r'<a href="(?P<chilehtml>video_.*?)" class=".*?">\n') #re预加载
resp = requests.get(url) #获取主页上子网页的地址 通过re
respconter = resp.text
resp.close()
contIds= []
catname = obj.finditer(respconter)
for iter in catname:
contIds.append(iter.group('chilehtml'))
saves=[]
for cont in contIds:
urler = url+cont
contId= cont.split('_')[1]
viedoStatusUrl = f'https://www.pearvideo.com/videoStatus.jsp?contId={contId}'
headers = {'Referer': urler}#防盗链
resp= requests.get(urler) #打开网站
respconter = resp.text
resp.close() #关闭网站
html=etree.HTML(respconter) #转化为Element对象
name = html.xpath('//*[@id="detailsbd"]/div[1]/div[2]/div/div[1]/h1/text()')[0][1:-1] #xpath解析 获得视频的名字
print(viedoStatusUrl)
resp = requests.get(viedoStatusUrl,headers=headers) #打开视频
respConter = resp.json()
srcUrl = respConter["videoInfo"]['videos']['srcUrl']
systemTime = respConter['systemTime']
resp.close()
srcUrl = srcUrl.replace(systemTime,f'cont-{contId}')
saves.append([name,srcUrl])
# with open(f'{name}.mp4', mode='wb') as f: #保存视频
# f.write(requests.get(srcUrl).content)
# print('over!')
with open('梨视频网址.csv',mode='w',newline='') as f: #保存
csvWrite = csv.writer(f)
for save in saves:
csvWrite.writerows([save])
print('over!')
2.代理
以下可以作为更改代理的模板
import requests
import time
proxies={"http":"http://112.6.117.135:8085","https":"https://103.168.44.126:3127"}
url = "http://baidu.com"
resp = requests.get(url,proxies=proxies)
resp.encoding='utf-8'
print(resp.text)
3.总结
做完了一点感觉都没有。。我干了什么做了什么,满脸问号。
爬虫这部分,慢慢看了,反正有时间可以做力扣的题。