百度的富文本集成秀米的时候遇到一个大坑,就是远程抓取图片本地化 抓取不到 背景图片.原因是秀米的背景用的是<section>,而不是<img>,百度抓取不到.查了很多资料 也没有找到问题的解决办法,只好用了一个最笨的方法.直接上代码
/**
*
*
*/
public static void main(String[] agrs){
Map<String, String> map = getImageUrl("<section style=\"display: inline-block; width: 100%; vertical-align: top; background-position: 0% 0%; background-repeat: repeat; background-size: 121.239%; background-attachment: scroll; background-image: url("http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/ef7d2a05f21bb29d21e46a919ce78611-sz_247029.png"); box-sizing: border-box;\"><section style=\\\"display: inline-block; width: 100%; vertical-align: top; background-position: 0% 0%; background-repeat: repeat; background-size: 121.239%; background-attachment: scroll; background-image: url("http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/ef7d2a05f21bb29d21e46a919ce78611-sz_247329.png"); box-sizing: border-box;\\\">\"");
for (Map.Entry<String, String> entry : map.entrySet()) {
//写自己的逻辑 //bizInfo.setContent(bizInfo.getContent().replaceAll(entry.getKey(), entry.getValue()));
}
}
/***
* 获取ImageUrl地址
*
* @param HTML
* @return
*/
private static Map<String, String> getImageUrl(String HTML) {
// 获取url正则
String IMGURL_REG = "url(.*?)[^>]*?\\)";
Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
Set<String> listImgUrl = new HashSet<String>();
while (matcher.find()) {
String s = matcher.group();
int start = s.indexOf("http");
System.out.println(start);
int end = s.lastIndexOf(".png");
if (end < 0) {
end = s.lastIndexOf(".jpg");
String str = s.substring(start, end + 4);
listImgUrl.add(str);
} else if (end < 0) {
end = s.lastIndexOf(".gif");
String str = s.substring(start, end + 4);
listImgUrl.add(str);
} else if (end < 0) {
end = s.lastIndexOf(".bmp");
String str = s.substring(start, end + 4);
listImgUrl.add(str);
} else if (end < 0) {
end = s.lastIndexOf(".jpeg");
String str = s.substring(start, end + 5);
listImgUrl.add(str);
} else if (end > 0) {
String str = s.substring(start, end + 4);
listImgUrl.add(str);
}
}
if (listImgUrl.size() > 0) {
return dowImg(listImgUrl);
}
return null;
}
/**
* 图片下载 上传
*
* @param set
* @return
*/
private static Map<String, String> dowImg(Set<String> set) {
Map<String, String> map = new HashMap<>();
String local = PropertyUtil.getProperty("localFilePath");
for (String str : set) {
//上传图片
try {
if (!str.startsWith("49.4.4.36")) {
map.put(str, FileUtilByRest.getFIlePath(uploadURLFile(new URL(str))));
}
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
return map;
}
/**
* 图片上传
* @param url
* @return
*/
public static String uploadURLFile(URL url) {
HttpClient httpclient = new DefaultHttpClient();
String fileId = null;
File temp = null;
try {
HttpPost httppost = new HttpPost(uploadUrl);
temp=FileUtilByLocal.getLocalFile(url);
FileBody bin = new FileBody(temp);
MultipartEntity reqEntity = new MultipartEntity();
reqEntity.addPart("upload", bin);// upload为请求后台的File upload;属性
long t = System.currentTimeMillis();
reqEntity.addPart("t", new StringBody(String.valueOf(t)));
reqEntity.addPart("token", new StringBody(getUploadToken(t)));
httppost.setEntity(reqEntity);
HttpResponse response = httpclient.execute(httppost);
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode == HttpStatus.SC_OK) {
HttpEntity resEntity = response.getEntity();
String text = EntityUtils.toString(resEntity);// httpclient自带的工具类读取返回数据
System.out.println(text);
JSONObject json = JSONObject.parseObject(text);
String code = json.getString("code");
if ("S0001".equals(code) && json.getJSONArray("data") != null) {
JSONArray jsonArray = json.getJSONArray("data");
fileId = jsonArray.getString(0);
} else {
throw new RuntimeException("认证已过期");
}
//EntityUtils.consume(resEntity);
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("upload error", e);
} finally {
try {
httpclient.getConnectionManager().shutdown();
if (temp != null) {
temp.delete();
}
} catch (Exception ignore) {
}
}
return fileId;
}