#coding:utf8
import selenium
from selenium import webdriver
import time
import re
import requests
theurl0 = 'http://tingshen.court.gov.cn/live/12288947'
#1.创建浏览器对象
driver0=webdriver.Firefox()
#2.请求页面
driver0.get(theurl0)
#3.等页面载入完成
time.sleep(5)#延迟
#print(driver.page_source)
thepage0 = driver0.page_source
#关闭页面
driver0.close()
#关闭浏览器
driver0.quit()
theurl0_page = theurl0.replace("http://tingshen.court.gov.cn/live/", "");
theurl0_dir = "C://ai2020//page//"+theurl0_page+".html"
with open(theurl0_dir,"w") as f:
f.write(thepage0)
#<iframe id="player" src="http://player.videoincloud.com/vod/7960930?src=gkw&cc=1" allowfullscreen="true" width="100%" height="100%" frameborder="0" scrolling="no"></iframe>
rule0 = r'http://player.videoincloud.com/vod/([\s\S]*?)"'
slotList0 = re.findall(rule0, thepage0)
#print(slotList0[0])
theurl1 = "http://player.videoincloud.com/vod/"+slotList0[0]
print(theurl1)
r1 = requests.get(theurl1)
thepage1 = r1.text
#flashvars.file = encodeURIComponent("http://222.81.52.222:1126/trials/2020_year/06_month/27_day/C8A32FDA_93B0_F3DA_990A_3A43FD0100A0/69823BC3_A556_DBC0_35FD_80AC37BE1634/4513945E_75B7_1190_CEE2_F5DBBCD53BA3vod.m3u8");
rule1 = r'encodeURIComponent\(\"([\s\S]*?)\"'
slotList1 = re.findall(rule1, thepage1)
theurl2 = slotList1[0]
#http://222.81.52.222:1126/trials/2020_year/06_month/27_day/C8A32FDA_93B0_F3DA_990A_3A43FD0100A0/69823BC3_A556_DBC0_35FD_80AC37BE1634/4513945E_75B7_1190_CEE2_F5DBBCD53BA3vod.m3u8
theurl_filename = theurl2.split('/')[-1]
theurl_front = theurl2.replace(theurl_filename, "");
r2 = requests.get(theurl2)
thepage2 = r2.text
#print(thepage2)
theurl2_dir = "C://ai2020//page//"+theurl0_page+".m3u8"
with open(theurl2_dir,"w") as f:
f.write(thepage2)
for line3 in open(theurl2_dir):
if ".ts" in line3:
ts_url = theurl_front+line3
theurl3_dir = "C://ai2020//page//"+line3
theurl3_dir = theurl3_dir.strip()
ts_url = ts_url.strip()
print(ts_url)
r3 = requests.get(ts_url)
with open(theurl3_dir, "wb") as code:
code.write(r3.content)
今天要抓一个页面,居然不能抓?
请教了大神,大神说用selenium,然后就搞定啦!
pip install selenium
Version: 3.141.0
在windows上面,用firefox的时候报错,
发现要装个这:
https://github.com/mozilla/geckodriver/releases
下载win64版,设置环境变量,就OK啦!