Java通过URL获取公众号文章生成HTML

说明:通过公众号URL获取的内容,文字可以正常显示,但是图片存在跨域访问的问题,微信不允许跨域访问公众号图片,所以需要将公众号图片从存入本地后,再上传至OSS,然后把HTML中的图片全部替换为自己的OSS地址就可以了

这里就需要在后台对HTML进行DOM的解析,需要用的Jsoup

<dependency>
			<groupId>com.aliyun.oss</groupId>
			<artifactId>aliyun-sdk-oss</artifactId>
			<version>2.2.3</version>

		</dependency>
		<dependency>
		    <groupId>org.jsoup</groupId>
		    <artifactId>jsoup</artifactId>
		    <version>1.9.2</version>
		</dependency>

controller

package com.iueang.controller;

import java.io.File;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;

import com.iueang.util.DownLoadImg;
import com.iueang.util.GetBody;
import com.iueang.util.OssUtil2;
import com.iueang.util.UrlUtil;
@Controller
public class TestUrl {
	
	@RequestMapping("tohtml")
	public String tohtml() {
		return "html/index.html";
	}
	@RequestMapping("getHtml")
	@ResponseBody
	public Map<String,String> getHtml(String url){
		//获取url文章生成文本
		String html = UrlUtil.getAccess(url);
		String reg = "<html>(.*?)</html>";
		String head=GetBody.getSubUtilSimple(html, reg);
		String HTTPHOST="http://yueang2.oss-cn-qingdao.aliyuncs.com/testimg/";
		String newsBody=head;
		Document doc = Jsoup.parse(newsBody);
	      Elements pngs = doc.select("img[data-src]");
	      System.out.println(pngs);
	      for (Element element : pngs) {
	    	 //获取图片地址
	        String imgUrl = element.attr("data-src");
	        //下载图片到本地
	        String filename=DownLoadImg.downloadPicture(imgUrl);
			File file =new File("D:\\m2\\"+filename);
			//上传至oss
			Boolean flag = OssUtil2.uploadFileToOss(file, "testimg/"+filename);
			if(flag) {
				file.delete();
			}
	        String newsrc =HTTPHOST + filename;
	         element.attr("src", newsrc);
	      }
	      newsBody = doc.toString();
	      System.out.println(newsBody);
		Map<String,String> map=new HashMap<String, String>();
		map.put("resultHtml", newsBody);
		return map;
		
	}
}

util工具类

GetBody类

package com.iueang.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class GetBody {

	public static String getSubUtilSimple(String html, String reg) {
		 Pattern pattern = Pattern.compile(reg);// 匹配的模式
	        Matcher m = pattern.matcher(html);
	        while(m.find()){
	            return m.group(1);
	        }
	        return "";
	}

}

OssUtil类

package com.iueang.util;

import java.io.File;
import java.util.HashMap;
import java.util.Map;

import com.aliyun.oss.OSSClient;
import com.aliyun.oss.model.ObjectMetadata;

public class OssUtil2 {	 
	//以下几个参数值必填,参考文章最后文档
	static String endpoint = "http://oss-cn-qingdao.aliyuncs.com";
	static String accessKeyId = "oss获取";
	static String accessKeySecert = "oss获取";
	static String bucketName = "yueang2";
	 
	/**
	 * 上传单个文件到OSS
	 * @param file 要上传的文件File对象
	 * @param objName 上传后的文件名,包含文件夹,比如 game/game/test.txt
	 * @return
	 */
	public static boolean uploadFileToOss(File file, String objName) {
	    try {
	        OSSClient ossClient = null;
	        try {
	            ossClient = new OSSClient(endpoint, accessKeyId, accessKeySecert);
	        }catch (Exception e){
	            e.printStackTrace();
	        }
	        ObjectMetadata meta = new ObjectMetadata();
	        ossClient.putObject(bucketName, objName, file, meta);
	        ossClient.shutdown();
	    } catch (Exception e) {
	        e.printStackTrace();
	        return false;
	    }
	    return true;
	}
}

DownLoadImg类

package com.iueang.util;

import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.UUID;

import sun.misc.BASE64Encoder;
public class DownLoadImg {
	 public static String downloadPicture(String urlList) {
		 String filename="iueang"+UUID.randomUUID().toString()+".png";
		 String path="D:/m2/"+filename;
         URL url = null;
         try {
             url = new URL(urlList);
             DataInputStream dataInputStream = new DataInputStream(url.openStream());
             FileOutputStream fileOutputStream = new FileOutputStream(new File(path));
             ByteArrayOutputStream output = new ByteArrayOutputStream();

             byte[] buffer = new byte[1024];
             int length;

             while ((length = dataInputStream.read(buffer)) > 0) {
                 output.write(buffer, 0, length);
             }
             BASE64Encoder encoder = new BASE64Encoder();
             String encode = encoder.encode(buffer);
             fileOutputStream.write(output.toByteArray());
             dataInputStream.close();
             fileOutputStream.close();
         } catch (MalformedURLException e) {
             e.printStackTrace();
         } catch (IOException e) {
             e.printStackTrace();
         }
         System.out.println("Download返回的filname="+filename);
		return filename;
     }
}

 

  • 26
    点赞
  • 85
    收藏
    觉得还不错? 一键收藏
  • 12
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 12
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值