使用猫抓插件查看视频页面:
猫爪插件地址:https://download.csdn.net/download/qq_35944102/12751647
查看.m3u8文件地址,
https://apd-d76221faa00f776e123126c247fccbdb.v.smtcdns.com/sportsts.tc.qq.com/AaypdBLtr9u-YamHt0g-wsYRosTc9RPqbUxzRLFWG6FA/uwMROfz2r5zAoaQXGdGnC2df644E7D3uP8M8pmtgwsRK9nEL/ctOoRPhfxNNS8pRHfUEALMA8J1OKmAMXbtZ2UosD57qe4fjKtkzuJZRj2z0nwDul_ZPfhQTiIallk1yQz73xiJxQmGzU8SnurlOhpPHrZLTXxnzZvWR9Krc-Wi0FcH851LWJ4K5TWn2iQRPwvyjaPhDXlvXjc8pDI5Emaf9-ux8/d0034d5pyst.321002.ts.m3u8?ver=4
1.分析页面源码,发现m3u8地址是一个合成地址,
获取地址:直接上代码
from selenium import webdriver
from fake_useragent import UserAgent
import requests
import time
from lxml import etree
#视频地址
url ="https://lpl.qq.com/es/video_detail.shtml?nid=38220&bMatchId=6685"
ua = UserAgent()
#生成随机headers
headers = {'User-Agent': ua.random}
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless') # 使用无头谷歌浏览器模式
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chromedriver = 'C:/Program Files (x86)/Google/Chrome/Application/chromedriver'
# driver = webdriver.Chrome(chrome_options=chrome_options,executable_path='C:/Program Files (x86)/Google/Chrome/Application/chromedriver')
driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options)
driver.get(url=url)
time.sleep(2)
#模拟点击播放按钮,发现点击一次获取不到m3u8地址,模拟点击了50次
while True:
time.sleep(1)
driver.find_element_by_xpath("//txpdiv[@data-role='txp-ui-control-playbtn']").click()
html = driver.page_source
html1 = etree.HTML(html)
host2 = html1.xpath("//txpdiv[@class='txp_left_controls']/txpdiv[@class='txp_time_display']/txpdiv[@class='txp_time_duration']")
for index in range(len(host2)):
if (index % 2) == 0:
host_name1 = host2[index].text
print(host_name1)
#当播放时长不是00:00停止点击
if host_name1 !='00:00':
break
time.sleep(3)
html1 = etree.HTML(html)
host = html1.xpath("//div[@id='liveCon']//txpdiv[@class='txp_console_inner']/txpdiv[@class='txp_line']/txpdiv[@data-role='txp-ui-console-cdn']")
#此处获取为一个列表 正常获取<Element a at 0x39a9a80> 所以写了一个循环
for index in range(len(host)):
if (index % 2) == 0:
host_name =host[index].text
print(host_name)
#拼接m3u8地址 由于后面的拼接数据有时效性,
m3u8_url ='https://{}/sportsts.tc.qq.com/AT1u-BHmgh8ggz92397MSFOZB7ayt13lws9bbPfMxJBU/uwMROfz2r5zAoaQXGdGnC2df644E7D3uP8M8pmtgwsRK9nEL/1fQq3doMj_NuoNJRW5xktbv7fngOTyifEJxpL6gLPhXyQaxmEvLqr1axSIM_nW9UHIk8ZIB7kCrfkouLxahhHJrPFt11oP5_91U4_neFpmXeM2R07r_U7cctF9Rl6f8GhhJwgBa9Es6kVwAYWUAZMtVxbpwySkrtjQhDpXIbksA/f0034cs60si.321002.ts.m3u8?ver=4'.format(host_name)
m3u8 =requests.get(url=m3u8_url,headers=headers)
print(m3u8.text)
成功获取到m3u8文件内容:
参考链接:https://www.cnblogs.com/z-x-y/p/8260213.html