一、使用selenium去使用phantomjs,原因是因为selenium封装了phantomjs一部分功能,selenium又提供了python的接口模块,在python语言中可以很好地去使用selenium,间接地就可以使用phantomjs。
例子如:
import requests from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time SERVICE_ARGS = ['--load-images=false', '--disk-cache=true'] driver = webdriver.PhantomJS(executable_path=r"E:\phantomjs-2.1.1-windows\bin\phantomjs.exe",service_args=SERVICE_ARGS) def login_newrank(url): try: driver.get(url) login = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//div[@class="login-normal-tap"]'))) login.click() print('登陆界面') user = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="account_input"]'))) user.send_keys('13500000000') pwd = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="password_input"]'))) pwd.send_keys('abc123') confirm = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@id="pwd_confirm"]'))) confirm.click() print('登陆') except Exception as e: print(e) return login_newrank(url) time.sleep(3) print(driver.page_source) if __name__ == '__main__':
二、使用phantomjs Webservice作为一种web服务的形式(api),将其与其他语言分离开来(比如python)。url = 'http://www.newrank.cn/public/login/login.html?back=http%3A//www.newrank.cn/' login_newrank(url)
如:servcie.js
requests_test.pyvar system=require('system'); var args=system.args; if (args.length ===2){ var port=Number(args[1]); } else{ var port=8080; } var webserver = require('webserver'); var server = webserver.create() var service = server.listen(port, function(request, response) { try{ var postRaw=request.postRaw; var aaa=new Array(); aaa=postRaw.split("="); var url=aaa[0]; var md5_url=aaa[1]; url=decodeURIComponent(url); var webPage = require('webpage'); var page = webPage.create(); page.settings.userAgent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Mobile Safari/537.36'; page.settings.resourceTimeout = 20000;//timeout is 20s page.onError = function(msg, trace) { console.log("[Warning]This is page.onError"); var msgStack = ['ERROR: ' + msg]; if (trace && trace.length) { msgStack.push('TRACE:'); trace.forEach(function(t) { msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : '')); }); } }; phantom.onError = function(msg, trace) { console.log("[Warning]This is phantom.onError"); var msgStack = ['PHANTOM ERROR: ' + msg]; if (trace && trace.length) { msgStack.push('TRACE:'); trace.forEach(function(t) { msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : '')); }); } console.log(msgStack.join('\n')); phantom.exit(1); }; page.open(url, function (status) { console.log('Target_url is ' + url); }); page.onLoadFinished = function(status) { console.log('Status: ' + status); if(status=='success'){ var current_url = page.url; var body= page.content; response.status=200; response.write(body); page.close(); response.close(); } else { var body="1"; var current_url=""; response.status=200; response.write(body); page.close(); response.close(); } }; } catch(e) { console.log('[Error]'+e.message+'happen'+e.lineNumber+'line'); } });
但这个例子里其中有些获取不到m3u8,具体原因暂时未知。import requests import hashlib import base64,re from multiprocessing.dummy import Pool class http_request: def __init__(self,port="8080"): self.url="http://localhost:"+port def getwebbody(self,domain): ''' 获取网页源代码 ''' base_domain=base64.b64encode(domain.encode('utf8')) md5_domain=hashlib.md5(base_domain).hexdigest() payload={domain:md5_domain} try: response=requests.post(self.url,data=payload,timeout=30).content return response except requests.exceptions.ConnectionError: print ("requests connection error") except Exception as e: print (e) return if __name__=="__main__": port="8080" cur=http_request(port) # domain="http://app.cntv.cn/special/cportal/newlive/index.html?id=LiveRZy6XP4F1Z2DERFogaLe170917&fromapp=cctvnews&from=singlemessage&isappinstalled=1&btime=1505612965&bauth=4ef2309698028ea2f53824d6bc707cae" # domain="http://izhibo.ifeng.com/live.html?liveid=110695&c_from_app=ifengnews&aman=06o208R4ecqe0b8f56fe88Ve54Ub9d1ec9x515aa08" # domain="http://wap-live.myzaker.com/?wap_open_type=wap&live_id=21840" # domain="https://c.m.163.com/news/l/154715.html?spss=newsapp&spsw=1&from=singlemessage&isappinstalled=1" domain="http://www.newscctv.net/219h5/#/article?videoId=ACBEF932-87FB-A8FB-179D-9BE3CCCEF9DA" ctn = cur.getwebbody(domain) print(ctn) m3u8 = re.findall("<video.*?src=\"(http.*?m3u8)",ctn.decode('utf8'))[0] print(m3u8)
参考:
https://thief.one/2017/03/31/Phantomjs%E6%AD%A3%E7%A1%AE%E6%89%93%E5%BC%80%E6%96%B9%E5%BC%8F/
https://thief.one/2017/03/01/Phantomjs%E7%88%AC%E8%BF%87%E7%9A%84%E9%82%A3%E4%BA%9B%E5%9D%91/