参考网上JAVA代码
package com.sysweal.callback;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CatchImage {
// 地址
private static final String URL = "http://www.baidu.com";
// 编码
private static final String ECODING = "UTF-8";
// 获取img标签正则
private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";
// 获取src路径的正则
private static final String IMGSRC_REG = "http:\"?(.*?)(\"|>|\\s+)";
public static void main(String[] args) throws Exception {
CatchImage cm = new CatchImage();
// 获得html文本内容
String HTML = cm.getHTML(URL);
// 获取图片标签
List<String> imgUrl = cm.getImageUrl(HTML);
// 获取图片src地址
List<String> imgSrc = cm.getImageSrc(imgUrl);
// 下载图片
cm.Download(imgSrc);
}
/***
* 获取HTML内容
*
* @param url
* @return
* @throws Exception
*/
private String getHTML(String url) throws Exception {
URL uri = new URL(url);
URLConnection connection = uri.openConnection();
InputStream in = connection.getInputStream();
byte[] buf = new byte[1024];
int length = 0;
StringBuffer sb = new StringBuffer();
while ((length = in.read(buf, 0, buf.length)) > 0) {
sb.append(new String(buf, ECODING));
}
in.close();
return sb.toString();
}
/***
* 获取ImageUrl地址
*
* @param HTML
* @return
*/
private List<String> getImageUrl(String HTML) {
Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
List<String> listImgUrl = new ArrayList<String>();
while (matcher.find()) {
listImgUrl.add(matcher.group());
}
return listImgUrl;
}
/***
* 获取ImageSrc地址
*
* @param listImageUrl
* @return
*/
private List<String> getImageSrc(List<String> listImageUrl) {
List<String> listImgSrc = new ArrayList<String>();
for (String image : listImageUrl) {
Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
while (matcher.find()) {
listImgSrc.add(matcher.group().substring(0,
matcher.group().length() - 1));
}
}
return listImgSrc;
}
/***
* 下载图片
*
* @param listImgSrc
*/
private void Download(List<String> listImgSrc) {
try {
for (String url : listImgSrc) {
String imageName = url.substring(url.lastIndexOf("/") + 1,
url.length());
URL uri = new URL(url);
InputStream in = uri.openStream();
FileOutputStream fo = new FileOutputStream(new File(imageName));
byte[] buf = new byte[1024];
int length = 0;
System.out.println("开始下载:" + url);
while ((length = in.read(buf, 0, buf.length)) != -1) {
fo.write(buf, 0, length);
}
in.close();
fo.close();
System.out.println(imageName + "下载完成");
}
} catch (Exception e) {
System.out.println("下载失败");
}
}
}
在已有的代码基础上,由于不会正则的具体表达,只有参考上面格式:android端
1、webview加载网页的时候,采用第三种方法:loadDataWithBaseURL(null, (String) msg.obj, "text/html", "utf-8", "");
主:MainActivity
public class MainActivity extends Activity {
private WebView webView;
private MyHandler handler;
private String url;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
url = "<html>"
+ " <head>"
+ " <title>WebViewCacheDemo</title> "
+ " </head>"
+ " <body>"
+ " <img src=\"http://img04.taobaocdn.com/imgextra/i4/608825099/T2nGXBXXpaXXXXXXXX_!!608825099.jpg_310x310.jpg\"/>"
+ " 为什么不能显示<br/>"
+ " <img src=\"http://www.baidu.com/img/baidu_sylogo1.gif\"/>"
+ " <br/>"
+ " <img src=\"http://www.baidu.com/img/baidu_sylogo1.gif\"/>"
+ " <br/>"
+ " <img src=\"http://gi4.md.alicdn.com/bao/uploaded/i4/T1FwYuFRFbXXXXXXXX_!!0-item_pic.jpg_360x360q90.jpg\"/>"
+ " <br/>" + " " + "</body>" + "</html>";
webView = (WebView) findViewById(R.id.webview);
webView.getSettings().setJavaScriptEnabled(true);
/** 加载本地网页 **/
handler = new MyHandler(getMainLooper());
new ImgCacheThread().start();
}
class MyHandler extends Handler {
public MyHandler(Looper looper) {
super(looper);
}
@Override
public void handleMessage(Message msg) {
switch (msg.what) {
case 100:
webView.loadDataWithBaseURL(null, (String) msg.obj, "text/html", "utf-8", "");
break;
}
super.handleMessage(msg);
}
}
class ImgCacheThread extends Thread {
@Override
public void run() {
Message message = new Message();
message.what = 100;
message.obj = new WebContentImageCache().updateWebUrl(url);
handler.sendMessage(message);
super.run();
}
}
}
帮助类:WebContentImageCache
package com.example.cachedemo.util;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import android.os.Environment;
import android.util.Log;
public class WebContentImageCache {
private String downAdd = "/mnt/sdcard/image/";// 默认的下载路径
private String useAdd = "file:///sdcard/image/";//默认的图片指向地址
private boolean flag;// 是否下载(仅第一次的时候下载)
/**
* url:URL网址
*
* 通过判断是否一次加载,是传回本来地址,不是加载修改过的地址
*/
public String updateWebUrl(String url) {
String temp = url;
if (ifFristLoad(url)) {
// 第一次下载,返回原始地址,同时下载保存
flag = true;
changeUrl(url);
} else {
// 返回改变地址
flag = false;
temp = changeUrl(url);
}
return temp;
}
/**
* 判断是否第一次加载
*/
public boolean ifFristLoad(String url) {
// 判断方法:查看是否保存替换后的值
Matcher m = Pattern.compile("<img[^>]+?src=\"(.+?)\"").matcher(url);
String fileName = "";
while (m.find()) {
String imgurl = m.group();
String imgName = imgurl.substring(imgurl.lastIndexOf("/") + 1,
imgurl.lastIndexOf("."));
try {
imgName = MD5.encryptMD5(imgName);
} catch (Exception e) {
e.printStackTrace();
}
String imgFormat = imgurl.substring(imgurl.lastIndexOf("."),
imgurl.length() - 1);// 图片格式
fileName = imgName + imgFormat;
break;
}
Log.d("fuck", fileName);
File file = new File(downAdd + fileName);
if (file.exists()) {
return false;
} else {
return true;
}
}
/**
* 将网址中图片地址替换<img src="http://www.baidu.com/img/bdlogo.gif" width="270"
* height="129">
*/
private String changeUrl(String url) {
Matcher m = Pattern.compile("<img[^>]+?src=\"(.+?)\"").matcher(url);
/* 默认图片集合 */
List<String> imgUrlList = new ArrayList<String>();
/* 新图片集合 */
List<String> imgNameList = new ArrayList<String>();
while (m.find()) {
String imgurl = m.group();
imgUrlList.add(imgurl);
String imgName = imgurl.substring(imgurl.lastIndexOf("/") + 1,
imgurl.lastIndexOf("."));
try {
imgName = MD5.encryptMD5(imgName);
} catch (Exception e) {
e.printStackTrace();
}
String imgFormat = imgurl.substring(imgurl.lastIndexOf("."),
imgurl.length() - 1);// 图片格式
imgNameList.add(imgName + imgFormat);
}
// 替换(二次替换)
for (int i = 0; i < imgUrlList.size(); i++) {
String imgurl = imgUrlList.get(i);
imgurl = imgurl.substring(imgurl.indexOf("\"") + 1,
imgurl.lastIndexOf("\""));
url = url.replaceAll(imgurl, useAdd + imgNameList.get(i));
}
// 下载图片,保存
if (flag) {
downLoad(imgUrlList, imgNameList);
}
return url;
}
// 下载图片
public void downLoad(List<String> imgUrlList, List<String> imgNameList) {
for (int i = 0; i < imgUrlList.size(); i++) {
String imgurl = imgUrlList.get(i);
imgurl = imgurl.substring(imgurl.indexOf("\"") + 1,
imgurl.lastIndexOf("\""));
try {
URL url = new URL(imgurl);
HttpURLConnection conn = (HttpURLConnection) url
.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5000);
InputStream is = conn.getInputStream();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
while ((len = is.read(buffer)) != -1) {
baos.write(buffer, 0, len);
}
byte[] result = baos.toByteArray();
baos.close();
File file = new File(downAdd + imgNameList.get(i));
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream fos = new FileOutputStream(file);
fos.write(result);
fos.flush();
fos.close();
is.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
后记:项目开始是勇哥思路,下次二次,由于是定制,因此不在乎这个情况,后靖哥提出,直接下载一次情况,而且第一次就替换,同时代码中有许多地方需要优化,先放上来,方便以后再接触的时候有思路,
需要学习:正则,URL转换,
未考虑情况:这个为了跟客户演示,做成死缓存的形式,依然很多情况没有考虑,比如:HTML图片换了,那么检测就必须检测所有图片,那原有图片得删除,
这个知识有小模型,够完成这次要求,但是程序依旧很多地方改进……