import requests as re
from bs4 import BeautifulSoup
import time
class VideoGet:
def __init__(self):
#初始化, 地址基本没用, 后面会重新设置
self.url = 'https://www.4kdy.org/TV/OM/juemingdushi1/juemingdushi0101/hls/MZ1Fpbwv1332079.ts'
self.headers = {
'Connection': 'Keep-Alive',
'Accept': 'text/html, application/xhtml+xml, */*',
'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
'User-Agent':'Mozilla/5.0 (Linux; U; Android 6.0; zh-CN; MZ-m2 note Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 MZBrowser/6.5.506 UWS/2.10.1.22 Mobile Safari/537.36'
}
print('init ok!!!!!!\n')
def GetFile(self):
respond = re.get(url = self.url, headers = self.headers)
self.file = respond.content
print('get file ok !!!!!\n')
def SaveVideo(self):
if i<10:
with open('D:/test/00'+str(i)+'.ts', 'wb') as f:
f.write(self.file)
elif i<100:
with open('D:/test/0'+str(i)+'.ts', 'wb') as f:
f.write(self.file)
else:
with open('D:/test/'+str(i)+'.ts', 'wb') as f:
f.write(self.file)
print('num '+str(i)+' save OK!!!!')
print('still left '+str(len(url_list)-i)+' file')
if __name__ == '__main__':
Video = VideoGet()
#获取到m3u8的内容, 并提取出ts文件的所有地址
Video.url = 'https://pstore-rk.chaoxing.com/record/live/232407987711410177/hls/232407987711410177-232407987711410177.m3u8'
with open('D:/test1.txt', 'wb') as f:
f.write(re.get(url = Video.url, headers = Video.headers).content)
#print(re.get(Video.url).text) '232407987711410177-1583395302074.ts
str_in = re.get(Video.url).text
str_list = str_in.split('\n')
#print(str_list)
#print(str_in)
url_list = []
for i in range(len(str_list)):
if str_list[i].endswith('.ts'):
url_list.append(str_list[i])
#print(url_list)
#开始爬取所有ts文件
for i in range(len(url_list)):
Video.url = "https://pstore-rk.chaoxing.com/record/live/232407987711410177/hls/"+url_list[i]
# time.sleep(5)
Video.GetFile()
Video.SaveVideo()
print('all ok!!!!!!')
print('over')
04_python爬虫爬取超星回放
最新推荐文章于 2021-09-09 00:11:12 发布