之前因为网站被封,
现在补充讲解iframe链接的获取
电影链接:http://www.meiju8.cc/movie/94753.html
请求链接进入该电影的首页,
提取电影播放界面的网址并拼接
代码如下:
def mv_page(self,url):
'''
请求输入的链接,获取电影播放界面的网址
'''
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")
data = soup.find_all("ul", class_="mn_list_li_movie")[0].find("li")
mv_url = self.input_url.split("/movie")[0] + data.find("a").get("href")
return self.iframe(mv_url)
找到iframe链接
通过观察script发现,链接是直接拼接而成
def iframe(self,url):
'''
请求播放页链接,获取iframe链接
'''
res = requests.get(url,headers=self.headers,verify=False)
compile = re.compile('"Url":"(?P<url>.*?)","bUrl"',re.S)
data = compile.search(res.text).group("url")
iframe_url = "https://jx.fqzy.cc/jx.php?url="+data
return self.video(iframe_url)
在请求时遇到问题,
iframe链接需要一些参数:
headers、referer和cookie
加上以上参数后,就能得到m3u8文件地址
import requests,re
from bs4 import BeautifulSoup
class MeiJuWang:
def __init__(self):
input_url = input("请输入要下载的电影链接:")
# 为方便测试使用
# self.input_url = "http://www.meiju8.cc/movie/94753.html"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"cookie": "Hm_lvt_b2b84ee87a8af3f8ad100a3da41c8ad0=1636809421,1636895260,1638451748,1638537010; Hm_lpvt_b2b84ee87a8af3f8ad100a3da41c8ad0=1638537010; Hm_lvt_b5093235610ff1922206ff61ddb2a910=1636807512,1636895261,1638451756,1638537011; Hm_lpvt_b5093235610ff1922206ff61ddb2a910=1638537011; prec=94769%3A2322363%2C94800%3A2322547%2C60215%3A2153100%2C60173%3A2151552%2C60204%3A2152665%2C94789%3A2322520%2C94768%3A2322357%2C94753%3A2321496%2C94654%3A2321560",
"Referer": "http://www.meiju8.cc/movie/94769.html"
}
# 爬虫正式执行
# self.mv_page(self.input_url)
''' 为方便测试用
url = "https://jx.fqzy.cc/jx.php?url=FQ:a1924f01f35f3e7eaa1f04507beecae2045329abcc180890491dd6a818b492c130b9572cd03424e0ba11a21613f1b64911147986fbb401ac50"
self.video(url)
'''
def video(self,url):
'''
请求iframe链接,获取视频地址
'''
res = requests.get(url, headers=self.headers,verify=False)
compile = re.compile('"blob:(?P<url>.*?)"', re.S)
video_url = compile.search(res.text).group("url")
return video_url
# return self.download(video_url)
剩下的就不重复了 ┑( ̄Д  ̄)┍