package com.yanshu.ttt;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import cn.edu.hfut.dmic.webcollector.model.Page;
public class PageUtils {
//直接调用原生phantomJS(即不通过selenium)
// 如果要更换运行环境,请注意exePath最后的phantom.exe需要更改。因为这个只能在window版本上运行。前面的路径名
// 也需要和exePath里面的保持一致。否则无法调用
private static String projectPath = System.getProperty("user.dir");
private static String jsPath = projectPath + File.separator + "codes.js";
private static String exePath = projectPath + File.separator + "phantomjs" + File.separator + "bin" + File.separator
+ "phantomjs.exe";
/**
* 获取webcollector 自带 htmlUnitDriver实例(模拟默认浏览器)
*
* @param page
* @return
*/
public static HtmlUnitDriver getDriver(Page page) {
HtmlUnitDriver driver = new HtmlUnitDriver();
driver.setJavascriptEnabled(true);
driver.get(page.getUrl());
return driver;
}
/**
* 获取webcollector 自带htmlUnitDriver实例
*
* @param page
* @param browserVersion 模拟浏览器
* @return
*/
public static HtmlUnitDriver getDriver(Page page,
BrowserVersion browserVersion) {
HtmlUnitDriver driver = new HtmlUnitDriver(browserVersion);
driver.setJavascriptEnabled(true);
driver.get(page.getUrl());
return driver;
}
/**
* 获取PhantomJsDriver(可以爬取js动态生成的html)
*
* @param page
* @return
*/
public static WebDriver getWebDriver(Page page) {
// WebDriver driver = new HtmlUnitDriver(true);
// System.setProperty("webdriver.chrome.driver", "D:\\Installs\\Develop\\crawling\\chromedriver.exe");
// WebDriver driver = new ChromeDriver();
//D:\任务\9-1\boot\ysrds\phantomjs\bin\phantomjs.exe
System.setProperty("phantomjs.binary.path", "D:/任务/9-1/ysrds/phantomjs/bin/phantomjs.exe");
WebDriver driver = new PhantomJSDriver();
driver.get(page.getUrl());
// JavascriptExecutor js = (JavascriptExecutor) driver;
// js.executeScript("function(){}");
return driver;
}
/**
* 直接调用原生phantomJS(即不通过selenium)
*
* @param page
* @return
*/
public static String getPhantomJSDriver(Page page) {
Runtime rt = Runtime.getRuntime();
Process process = null;
try {
/* process = rt.exec("D:/Program Files/phantomjs-2.0.0-windows/bin/phantomjs.exe" +
"D:/MyEclipseWorkSpace/WebCollectorDemo/src/main/resources/parser.js " +
page.getUrl().trim()); */
process = rt.exec(exePath + " " + jsPath + " " + page.getUrl().trim()); //这里我的codes.js是保存在c盘下面的phantomjs目录
InputStream in = process.getInputStream();
InputStreamReader reader = new InputStreamReader(
in, "UTF-8");
BufferedReader br = new BufferedReader(reader);
StringBuffer sbf = new StringBuffer();
String tmp = "";
while((tmp = br.readLine())!=null){
sbf.append(tmp);
}
System.out.println("--->>>>"+sbf.toString());
return sbf.toString();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public static String getPhantomJSDriver11(String url) throws IOException {
System.out.println("projectPath==>"+projectPath);
System.out.println("jsPath==>"+jsPath);
System.out.println("exePath==>"+exePath);
Runtime rt = Runtime.getRuntime();
Process p = rt.exec(exePath + " " + jsPath + " " + url); //这里我的codes.js是保存在c盘下面的phantomjs目录
try {
System.out.println("--ppp--------");
Thread.sleep(30);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
InputStream is = p.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is,"UTF-8"));
StringBuffer sbf = new StringBuffer();
String tmp = "";
while((tmp = br.readLine())!=null){
sbf.append(tmp);
}
System.out.println(sbf.toString());
return sbf.toString();
}
public static void main(String[] args) {
System.setProperty("webdriver.chrome.driver", "D://gogole//chromedriver.exe");
//System.setProperty("webdriver.chrome.driver", "D://gogo//chromedriver_win32//chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("https://www.baidu.com");
// 获取 网页的 title
System.out.println("The testing page title is: " + driver.getTitle());
driver.quit();
//打开默认路径的firefox
}
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import cn.edu.hfut.dmic.webcollector.model.Page;
public class PageUtils {
//直接调用原生phantomJS(即不通过selenium)
// 如果要更换运行环境,请注意exePath最后的phantom.exe需要更改。因为这个只能在window版本上运行。前面的路径名
// 也需要和exePath里面的保持一致。否则无法调用
private static String projectPath = System.getProperty("user.dir");
private static String jsPath = projectPath + File.separator + "codes.js";
private static String exePath = projectPath + File.separator + "phantomjs" + File.separator + "bin" + File.separator
+ "phantomjs.exe";
/**
* 获取webcollector 自带 htmlUnitDriver实例(模拟默认浏览器)
*
* @param page
* @return
*/
public static HtmlUnitDriver getDriver(Page page) {
HtmlUnitDriver driver = new HtmlUnitDriver();
driver.setJavascriptEnabled(true);
driver.get(page.getUrl());
return driver;
}
/**
* 获取webcollector 自带htmlUnitDriver实例
*
* @param page
* @param browserVersion 模拟浏览器
* @return
*/
public static HtmlUnitDriver getDriver(Page page,
BrowserVersion browserVersion) {
HtmlUnitDriver driver = new HtmlUnitDriver(browserVersion);
driver.setJavascriptEnabled(true);
driver.get(page.getUrl());
return driver;
}
/**
* 获取PhantomJsDriver(可以爬取js动态生成的html)
*
* @param page
* @return
*/
public static WebDriver getWebDriver(Page page) {
// WebDriver driver = new HtmlUnitDriver(true);
// System.setProperty("webdriver.chrome.driver", "D:\\Installs\\Develop\\crawling\\chromedriver.exe");
// WebDriver driver = new ChromeDriver();
//D:\任务\9-1\boot\ysrds\phantomjs\bin\phantomjs.exe
System.setProperty("phantomjs.binary.path", "D:/任务/9-1/ysrds/phantomjs/bin/phantomjs.exe");
WebDriver driver = new PhantomJSDriver();
driver.get(page.getUrl());
// JavascriptExecutor js = (JavascriptExecutor) driver;
// js.executeScript("function(){}");
return driver;
}
/**
* 直接调用原生phantomJS(即不通过selenium)
*
* @param page
* @return
*/
public static String getPhantomJSDriver(Page page) {
Runtime rt = Runtime.getRuntime();
Process process = null;
try {
/* process = rt.exec("D:/Program Files/phantomjs-2.0.0-windows/bin/phantomjs.exe" +
"D:/MyEclipseWorkSpace/WebCollectorDemo/src/main/resources/parser.js " +
page.getUrl().trim()); */
process = rt.exec(exePath + " " + jsPath + " " + page.getUrl().trim()); //这里我的codes.js是保存在c盘下面的phantomjs目录
InputStream in = process.getInputStream();
InputStreamReader reader = new InputStreamReader(
in, "UTF-8");
BufferedReader br = new BufferedReader(reader);
StringBuffer sbf = new StringBuffer();
String tmp = "";
while((tmp = br.readLine())!=null){
sbf.append(tmp);
}
System.out.println("--->>>>"+sbf.toString());
return sbf.toString();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public static String getPhantomJSDriver11(String url) throws IOException {
System.out.println("projectPath==>"+projectPath);
System.out.println("jsPath==>"+jsPath);
System.out.println("exePath==>"+exePath);
Runtime rt = Runtime.getRuntime();
Process p = rt.exec(exePath + " " + jsPath + " " + url); //这里我的codes.js是保存在c盘下面的phantomjs目录
try {
System.out.println("--ppp--------");
Thread.sleep(30);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
InputStream is = p.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is,"UTF-8"));
StringBuffer sbf = new StringBuffer();
String tmp = "";
while((tmp = br.readLine())!=null){
sbf.append(tmp);
}
System.out.println(sbf.toString());
return sbf.toString();
}
public static void main(String[] args) {
System.setProperty("webdriver.chrome.driver", "D://gogole//chromedriver.exe");
//System.setProperty("webdriver.chrome.driver", "D://gogo//chromedriver_win32//chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("https://www.baidu.com");
// 获取 网页的 title
System.out.println("The testing page title is: " + driver.getTitle());
driver.quit();
//打开默认路径的firefox
}
}
---------------
//codes.js
system = require('system')
address = system.args[1];//获得命令行第二个参数 接下来会用到
//console.log('Loading a web page');
var page = require('webpage').create();
var url = address;
//console.log(url);
page.open(url, function (status) {
//Page is loaded!
if (status !== 'success') {
console.log('Unable to post!');
} else {
//console.log(page.content);
//var title = page.evaluate(function() {
// return document.title;//示范下如何使用页面的jsapi去操作页面的 www.oicqzone.com
// });
//console.log(title);
console.log(page.content);
}
phantom.exit();
});