import re
import time
import requests
def pear_videos(url_html, id):
# url_real = 'https://video.pearvideo.com/mp4/adshort/20220325/cont-1756320-15850227_adpkg-ad_hd.mp4'
#
# url_src = 'https://video.pearvideo.com/mp4/adshort/20220325/1648302899162-15850227_adpkg-ad_hd.mp4'
# video_status = 'https://www.pearvideo.com/videoStatus.jsp?contId=1756320&mrd=0.21677247232554842'
# url_html = 'https://www.pearvideo.com/video_1756320'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/99.0.4844.51 Safari/537.36 ',
# 防盗链:溯源,本次请求的上一级是什么
'Referer': f'https://www.pearvideo.com/video_{id}'
}
cont_id = url_html.split('_')[1]
video_status = f'https://www.pearvideo.com/videoStatus.jsp?contId={cont_id}&mrd=0.21677247232554842'
resp = requests.get(url=video_status, headers=headers)
# print(resp.json())
dic = resp.json()
urlSrc = dic['videoInfo']['videos']['srcUrl']
systemTime = dic['systemTime']
url_real = urlSrc.replace(systemTime, f'cont-{cont_id}')
# print(url_real)
video_name = cont_id
video_resp = requests.get(url_real)
with open(f'F:\\pythonProject\\video\\{video_name}.mp4', mode='wb') as v:
v.write(video_resp.content)
print(cont_id, "ok")
if __name__ == '__main__':
url = 'https://www.pearvideo.com/'
headers1 = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/99.0.4844.51 Safari/537.36 ',
}
resp1 = requests.get(url=url, headers=headers1)
page_content = resp1.text
obj = re.compile(r'<div class="vervideo-bd">.*?'
r'<a href="video_(?P<id>.*?)" class', re.S)
result = obj.finditer(page_content)
c = 0
for i in result:
url_html = 'https://www.pearvideo.com/video_%s' % i.group('id')
print(url_html, "url_html")
c += 1
pear_videos(url_html, i.group('id'))
# 不要爬太多
if c == 3:
break
time.sleep(3)
pearvideo
于 2022-03-27 16:39:47 首次发布