1.如果是单个网页,可以用下面的代码操作,下载网页,无弹窗。
package com.hlhlo.recruitment.download.service.impl;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxOptions;
import org.openqa.selenium.firefox.FirefoxProfile;
public class DownloadTest {
public static void main(String[] args) {
System.setProperty("webdriver.gecko.driver", "F:\\WebDriver\\geckodriver.exe");
FirefoxProfile profile = new FirefoxProfile();
// 可以在Firefox浏览器地址栏中输入about:config来查看属性
// 设置下载文件放置路径,注意如果是windows环境一定要用\\,用/不行
String path = "F:\\WebDriver\\download";
// 配置响应下载参数
profile.setPreference("browser.download.dir", path);// 下载路径
profile.setPreference("browser.download.folderList", 2);// 2为保存在指定路径,0代表默认路径
profile.setPreference("browser.download.manager.showWhenStarting", false);// 是否显示开始
// 禁止弹出保存框,value是文件格式,如zip文件
//常用的MIME类型 .mht .mhtml message/rfc822
profile.setPreference("browser.helperApps.neverAsk.saveToDisk","message/rfc822");
//关于类型:可以参考http://www.w3school.com.cn/media/media_mimeref.asp
System.setProperty("webdriver.gecko.driver", "F:\\WebDriver\\geckodriver.exe");
//WebDriver driver = new FirefoxDriver();
FirefoxOptions options = new FirefoxOptions();
options.setProfile(profile);
WebDriver driver = new FirefoxDriver(options);
//driver.get("file:///F:/dict_en_zhcn_2_pngs.rar");
driver.get("file:///F:/a.mht");
}
}
2.但是如果网页一开始有登录页面,然后打开其他网页,下载这个网页的东西,这个就不灵了,下载弹窗还是会打开。
如果另外新建一个WebDriver,又会提示先登录的信息。
所以解决方法是,用HttpGet直接下载,但是必须带上cookie,也就是带上登录信息才行。
step 1.现在获取cookie
Set<Cookie> cookies = this.webDriver.manage().getCookies();
StringBuffer cookieStr = new StringBuffer();
for (Cookie cookie : cookies) {
cookieStr.append(String.format("%s=%s;", cookie.getName(), cookie.getValue()));
}
this.cookie = cookieStr.toString();
step2.根据url得到对应网页的内容
/**
* 根据url下载页面
* @param url:下载的页面url
* @param cookie:网页的cookie,也就是登录信息。
**/
private String downloadByURL(String url,String cookie) {
HttpGet get = new HttpGet(url);
get.addHeader("Host", "jianli.58.com");
get.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0");
get.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
get.addHeader("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
get.addHeader("Accept-Encoding", "gzip, deflate");
get.addHeader("Connection", "keep-alive");
get.addHeader("Cookie", cookie);//刚才获取的cookie;
get.addHeader("Upgrade-Insecure-Requests", "1");
try {
CloseableHttpResponse response = this.httpClient.execute(get);//httpClient已经获取
HttpEntity entity = response.getEntity();
InputStream is = entity.getContent();
String html = Util.mht2String(is);//下载的网页是mht格式,如果不想要,可以转化成html
IOUtils.closeQuietly(is);
return html;
} catch (IOException e) {
log.error("发生异常:", e);
}
return null;
}
step3.下载的网页是mht格式,只能用ie打开,所以要转换成html,才可以被火狐浏览器打开。转换方法如下:
pom.xml
<!-- Jsoup,JavaHTML解析器 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.2</version>
</dependency>
Util类,Mht格式转换成html格式
import javax.activation.DataHandler;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Session;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
import javax.mail.internet.MimePartDataSource;
import java.io.*;
import java.util.Enumeration;
public class Util{
public static String mht2String(InputStream fis) {
try {
Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
MimeMessage msg = new MimeMessage(mailSession, fis);
Object content = msg.getContent();
if (content instanceof Multipart) {
MimeMultipart mp = (MimeMultipart) content;
MimeBodyPart bp1 = (MimeBodyPart) mp.getBodyPart(0);
//获取mht文件内容代码的编码
String strEncodng = getEncoding(bp1);
//获取mht文件的内容
String strText = getHtmlText(bp1, strEncodng);
return strText;
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* 将 mht文件转换成 html文件
* @param s_SrcMht
* @param s_DescHtml
*/
public static void mht2html(String s_SrcMht, String s_DescHtml) {
try {
InputStream fis = new FileInputStream(s_SrcMht);
Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
MimeMessage msg = new MimeMessage(mailSession, fis);
Object content = msg.getContent();
if (content instanceof Multipart){
MimeMultipart mp = (MimeMultipart)content;
MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
//获取mht文件内容代码的编码
String strEncodng = getEncoding(bp1);
//获取mht文件的内容
String strText = getHtmlText(bp1, strEncodng);
if (strText == null)
return;
//创建以mht文件名称的文件夹,主要用来保存资源文件。
File parent = null;
if (mp.getCount() > 1) {
parent = new File(new File(s_DescHtml).getAbsolutePath() + ".files");
parent.mkdirs();
if (!parent.exists()){ //创建文件夹失败的话则退出
return;
}
}
//FOR中代码 主要是保存资源文件及替换路径
for (int i = 1; i < mp.getCount(); ++i) {
MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);
//获取资源文件的路径
//例(获取: http://xxx.com/abc.jpg)
String strUrl = getResourcesUrl(bp);
if (strUrl==null || strUrl.length()==0)
continue;
DataHandler dataHandler = bp.getDataHandler();
MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();
//获取资源文件的绝对路径
String FilePath = parent.getAbsolutePath() + File.separator + getName(strUrl, i);
File resources = new File(FilePath);
//保存资源文件
if (SaveResourcesFile(resources, bp.getInputStream())){
//将远程地址替换为本地地址 如图片、JS、CSS样式等等
strText = strText.replace(strUrl, resources.getAbsolutePath());
}
}
//最后保存HTML文件
SaveHtml(strText, s_DescHtml, strEncodng);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 获取mht文件内容中资源文件的名称
* @param strName
* @param ID
* @return
*/
public static String getName(String strName, int ID) {
char separator1 = '/';
char separator2 = '\\';
//将换行替换
strName = strName.replaceAll("\r\n", "");
//获取文件名称
if( strName.lastIndexOf(separator1) >= 0){
return strName.substring(strName.lastIndexOf(separator1) + 1);
}
if( strName.lastIndexOf(separator2) >= 0){
return strName.substring(strName.lastIndexOf(separator2) + 1);
}
return "";
}
/**
* 将提取出来的html内容写入保存的路径中。
* @param s_HtmlTxt
* @param s_HtmlPath
* @param s_Encode
*/
public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath , String s_Encode) {
try{
Writer out = null;
out = new OutputStreamWriter(new FileOutputStream(s_HtmlPath, false), s_Encode);
out.write(s_HtmlTxt);
out.close();
}catch(Exception e){
return false;
}
return true;
}
/**
* 保存网页中的JS、图片、CSS样式等资源文件
* @param SrcFile 源文件
* @param inputStream 输入流
* @return
*/
private static boolean SaveResourcesFile(File SrcFile, InputStream inputStream) {
if (SrcFile == null || inputStream == null) {
return false;
}
BufferedInputStream in = null;
FileOutputStream fio = null;
BufferedOutputStream osw = null;
try {
in = new BufferedInputStream(inputStream);
fio = new FileOutputStream(SrcFile);
osw = new BufferedOutputStream(new DataOutputStream(fio));
int index = 0;
byte[] a = new byte[1024];
while ((index = in.read(a)) != -1) {
osw.write(a, 0, index);
}
osw.flush();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
} finally{
try {
if (osw != null)
osw.close();
if (fio != null)
fio.close();
if (in != null)
in.close();
if (inputStream != null)
inputStream.close();
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
}
/**
* 获取mht文件里资源文件的URL路径
* @param bp
* @return
*/
private static String getResourcesUrl(MimeBodyPart bp) {
if(bp==null){
return null;
}
try {
Enumeration list = bp.getAllHeaders();
while (list.hasMoreElements()) {
javax.mail.Header head = (javax.mail.Header)list.nextElement();
if (head.getName().compareTo("Content-Location") == 0) {
return head.getValue();
}
}
return null;
} catch (MessagingException e) {
return null;
}
}
/**
* 获取mht文件中的内容代码
* @param bp
* @param strEncoding 该mht文件的编码
* @return
*/
private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
InputStream textStream = null;
BufferedInputStream buff = null;
BufferedReader br = null;
Reader r = null;
try {
textStream = bp.getInputStream();
buff = new BufferedInputStream(textStream);
r = new InputStreamReader(buff, strEncoding);
br = new BufferedReader(r);
StringBuffer strHtml = new StringBuffer("");
String strLine = null;
while ((strLine = br.readLine()) != null) {
strHtml.append(strLine + "\r\n");
}
br.close();
r.close();
textStream.close();
return strHtml.toString();
} catch (Exception e) {
e.printStackTrace();
} finally{
try{
if (br != null)
br.close();
if (buff != null)
buff.close();
if (textStream != null)
textStream.close();
}catch(Exception e){
}
}
return null;
}
/**
* 获取mht网页文件中内容代码的编码
* @param bp
* @return
*/
private static String getEncoding(MimeBodyPart bp) {
if(bp==null){
return null;
}
try {
Enumeration list = bp.getAllHeaders();
while (list.hasMoreElements()) {
javax.mail.Header head = (javax.mail.Header)list.nextElement();
if (head.getName().compareTo("Content-Type") == 0) {
String strType = head.getValue();
int pos = strType.indexOf("charset=");
if (pos>=0) {
String strEncoding = strType.substring(pos + 8, strType.length());
if(strEncoding.startsWith("\"") || strEncoding.startsWith("\'")){
strEncoding = strEncoding.substring(1 , strEncoding.length());
}
if(strEncoding.endsWith("\"") || strEncoding.endsWith("\'")){
strEncoding = strEncoding.substring(0 , strEncoding.length()-1);
}
if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
strEncoding = "gbk";
}
return strEncoding;
}
}
}
} catch (MessagingException e) {
e.printStackTrace();
}
return null;
}
}