phantomjs的使用

一、使用selenium去使用phantomjs,原因是因为selenium封装了phantomjs一部分功能,selenium又提供了python的接口模块,在python语言中可以很好地去使用selenium,间接地就可以使用phantomjs。

例子如:

import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']
driver = webdriver.PhantomJS(executable_path=r"E:\phantomjs-2.1.1-windows\bin\phantomjs.exe",service_args=SERVICE_ARGS)


def login_newrank(url):
    try:
        driver.get(url)
        
        login = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//div[@class="login-normal-tap"]')))
        login.click()
        print('登陆界面')
        user = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="account_input"]')))
        user.send_keys('13500000000')
        
        pwd = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="password_input"]')))
        pwd.send_keys('abc123')
       
        confirm = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@id="pwd_confirm"]')))
        confirm.click()
        print('登陆')
    except Exception as e:
        print(e)
        return login_newrank(url)
    time.sleep(3)
    print(driver.page_source)

if __name__ == '__main__':
    url = 'http://www.newrank.cn/public/login/login.html?back=http%3A//www.newrank.cn/'
    login_newrank(url)

二、使用phantomjs Webservice作为一种web服务的形式(api),将其与其他语言分离开来(比如python)。
如:

servcie.js

var system=require('system'); 
var args=system.args;
if (args.length ===2){
    var port=Number(args[1]);
}
else{
    var port=8080;
}
var webserver = require('webserver');
var server = webserver.create()
var service = server.listen(port, function(request, response) {
    try{
        var postRaw=request.postRaw;
        var aaa=new Array();
        aaa=postRaw.split("=");
        var url=aaa[0];
        var md5_url=aaa[1];
        url=decodeURIComponent(url);
        var webPage = require('webpage');
        var page = webPage.create();
        page.settings.userAgent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Mobile Safari/537.36';
        page.settings.resourceTimeout = 20000;//timeout is 20s
        page.onError = function(msg, trace) {
            console.log("[Warning]This is page.onError");
            var msgStack = ['ERROR: ' + msg];
            if (trace && trace.length) {
                msgStack.push('TRACE:');
                trace.forEach(function(t) {
                  msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
                });
            }
        };
        phantom.onError = function(msg, trace) {
            console.log("[Warning]This is phantom.onError");
            var msgStack = ['PHANTOM ERROR: ' + msg];
            if (trace && trace.length) {
              msgStack.push('TRACE:');
              trace.forEach(function(t) {
                msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
              });
            }
              console.log(msgStack.join('\n'));
              phantom.exit(1);
        };

        page.open(url, function (status) {
            console.log('Target_url is ' + url);  
        });
        page.onLoadFinished = function(status) {
        console.log('Status: ' + status);
        if(status=='success'){
                 var current_url = page.url;
                 var body= page.content;
                 response.status=200;
                response.write(body);
                page.close();
                response.close();
              }
              else
              {
                var body="1";
                var current_url="";
                  response.status=200;
                response.write(body);  
                page.close();
                response.close();
              }
};
    }
    catch(e)
    {
      console.log('[Error]'+e.message+'happen'+e.lineNumber+'line');
    }
});
requests_test.py

import requests
import hashlib
import base64,re
from multiprocessing.dummy import Pool
class http_request:
  def __init__(self,port="8080"):
    self.url="http://localhost:"+port
  
  def getwebbody(self,domain):
    '''
    获取网页源代码
    '''
    base_domain=base64.b64encode(domain.encode('utf8'))
    md5_domain=hashlib.md5(base_domain).hexdigest()
    payload={domain:md5_domain}
    try:
      response=requests.post(self.url,data=payload,timeout=30).content
      return response
    except requests.exceptions.ConnectionError:
      print ("requests connection error")
    except Exception as e:
      print (e)
    return
if __name__=="__main__":
    port="8080"
    cur=http_request(port)
    # domain="http://app.cntv.cn/special/cportal/newlive/index.html?id=LiveRZy6XP4F1Z2DERFogaLe170917&fromapp=cctvnews&from=singlemessage&isappinstalled=1&btime=1505612965&bauth=4ef2309698028ea2f53824d6bc707cae"
    # domain="http://izhibo.ifeng.com/live.html?liveid=110695&c_from_app=ifengnews&aman=06o208R4ecqe0b8f56fe88Ve54Ub9d1ec9x515aa08"
    # domain="http://wap-live.myzaker.com/?wap_open_type=wap&live_id=21840"
    # domain="https://c.m.163.com/news/l/154715.html?spss=newsapp&spsw=1&from=singlemessage&isappinstalled=1"
    domain="http://www.newscctv.net/219h5/#/article?videoId=ACBEF932-87FB-A8FB-179D-9BE3CCCEF9DA"
    ctn = cur.getwebbody(domain)
    print(ctn)
    m3u8 = re.findall("<video.*?src=\"(http.*?m3u8)",ctn.decode('utf8'))[0]
    print(m3u8)
但这个例子里其中有些获取不到m3u8,具体原因暂时未知。


参考:

https://thief.one/2017/03/31/Phantomjs%E6%AD%A3%E7%A1%AE%E6%89%93%E5%BC%80%E6%96%B9%E5%BC%8F/
https://thief.one/2017/03/01/Phantomjs%E7%88%AC%E8%BF%87%E7%9A%84%E9%82%A3%E4%BA%9B%E5%9D%91/

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值