Java解析富文本

  <dependency>
            <groupId>gui.ava</groupId>
            <artifactId>html2image</artifactId>
            <version>2.0.1</version>
        </dependency>

        <dependency>
            <groupId>com.github.xuwei-k</groupId>
            <artifactId>html2image</artifactId>
            <version>0.1.0</version>
        </dependency>

<dependency>
  <!-- jsoup HTML parser library @ https://jsoup.org/ -->
  <groupId>org.jsoup</groupId>
  <artifactId>jsoup</artifactId>
  <version>1.13.1</version>
</dependency>

富文本

package com.src.asset.svc.util;

import cn.hutool.core.io.FileUtil;
import gui.ava.html.image.generator.HtmlImageGenerator;
import gui.ava.html.parser.HtmlParser;
import gui.ava.html.parser.HtmlParserImpl;
import gui.ava.html.renderer.ImageRendererImpl;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.awt.image.BufferedImage;
import java.io.*;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class LabelConversion {
    public static void main(String[] args) throws InterruptedException {
        //此处 我给你发的那个文本 就是数据
        String repairPlan = "";
        String a = "<!DOCTYPE html>\n" +
                "<html lang=\"en\">\n" +
                "<head>\n" +
                "    <meta charset=\"UTF-8\">\n" +
                "    <title>Title</title>\n" +
                "</head>\n" +
                "<body>\n" +
                "<div>";
        String b = "</div>\n" +
                "</body>\n" +
                "</html>";
//        String result=a+repairPlan+b;
        List<File> files = riskReStepPic(repairPlan);

        Document parse = Jsoup.parse(repairPlan);
        Elements select = parse.select("img[src]");
        for (Element element : select) {
            element.attr("src", "https://ts1.cn.mm.bing.net/th/id/R-C.901f8ebdab22d065baefeae6c2701cc0?rik=Z3Hew18zFaF%2bLQ&riu=http%3a%2f%2fwww.pp3.cn%2fuploads%2f20120418lw%2f13.jpg&ehk=Es5ZGH90h%2foCghvlIwdKfUiqpO05gLSgOEBU2i0Mwok%3d&risl=&pid=ImgRaw&r=0");
        }
        String x = parse.toString();
        System.out.println(x);
        String result = a + x + b;
        HtmlImageGenerator generator = new HtmlImageGenerator();
        generator.loadHtml(result);
//        TimeUnit.SECONDS.sleep(3);
        BufferedImage bufferedImage = generator.getBufferedImage();
//        TimeUnit.SECONDS.sleep(3);
        File imgFile = FileUtil.mkdir("imgFile");
        generator.saveAsImage(imgFile + "/" + "html.png");
        //把html写入到图片
        //加休眠是因为网上说 需要给时间反应一下 主线程结束可能图片还没生成 没有用
    }

    public static File imageConversion(String repairPlan, List<String> url) throws InterruptedException {
        String a = "<!DOCTYPE html>\n" +
                "<html lang=\"en\">\n" +
                "<head>\n" +
                "    <meta charset=\"UTF-8\">\n" +
                "    <title>Title</title>\n" +
                "</head>\n" +
                "<body>\n" +
                "<div>";
        String b = "</div>\n" +
                "</body>\n" +
                "</html>";
        Document parse = Jsoup.parse(repairPlan);
        Elements select = parse.select("img[src]");
//        for (Element element : select) {
//            for (String s : url) {
//                //src属性值替换成url路径
//                element.attr("src", s);
//            }
//        }
        for (int i = 0; i < select.size(); i++) {
            select.get(i).attr("src", url.get(i));
        }
        //src属性值替换成url路径

//

        String x = parse.toString();
//        System.out.println(x);
        String result = a + x + b;
        HtmlImageGenerator generator = new HtmlImageGenerator();
        generator.loadHtml(result);
        TimeUnit.SECONDS.sleep(3);
        BufferedImage bufferedImage = generator.getBufferedImage();
        TimeUnit.SECONDS.sleep(3);
        File imgFile = FileUtil.mkdir("imgFile");
        generator.saveAsImage(imgFile + "/" + "html.png");
        return imgFile;
    }

    /**
     * =
     * html 转 png
     *
     * @param html 超文本标记语言
     * @return {@code String} 图片名字
     * @throws FileNotFoundException 文件未发现异常
     */
    public static String htmlToPNG(String html) throws FileNotFoundException {
        HtmlParser htmlParser = new HtmlParserImpl();
        htmlParser.loadHtml(html);
        // html 是我的html代码
        ImageRendererImpl imageRenderer = new ImageRendererImpl(htmlParser);
        File imgFile = FileUtil.mkdir("imgFile");
        //String path2 = ResourceUtils.getURL(ResourceUtils.CLASSPATH_URL_PREFIX + "img").getPath();
        cn.hutool.core.lang.UUID uuid = cn.hutool.core.lang.UUID.randomUUID();

        imageRenderer.saveImage(imgFile + "/" + uuid + ".png");

        return uuid + ".png";
    }

    /**
     * 替换富文本中的url
     *
     * @param repairPlan
     * @param url
     * @return
     * @throws InterruptedException
     */
    public static String imageConversion2(String repairPlan, List<String> url) throws InterruptedException {

        Document parse = Jsoup.parse(repairPlan);
        Elements select = parse.select("img[src]");
        for (int i = 0; i < select.size(); i++) {
            select.get(i).attr("src", url.get(i));
        }
        //src属性值替换成url路径
        String x = parse.toString();
        return x;
    }

    //jsoup解析 html内容获取多个图片生成文件
    public static List<File> riskReStepPic(String reStep) {
        ArrayList<File> pictures = new ArrayList<>();
        Document parse = Jsoup.parse(reStep);
        Elements images = parse.select("img[src]");
        for (Element image : images) {
            String s = image.attr("src");
            String base64Img = checkSuffix(s);
            byte[] imgByte = Base64.getDecoder().decode(base64Img);
            try {
                for (int i = 0; i < imgByte.length; ++i) {
                    if (imgByte[i] < 0) {//调整异常数据
                        imgByte[i] += 256;
                    }
                }
                UUID fileName = UUID.randomUUID();
//                File picture = new File(fileName + ".jpeg");
                File file = FileUtil.mkdir("file");
                File picture = new File(file + "/" + fileName + ".jpg");
                OutputStream out = new FileOutputStream(picture);
                out.write(imgByte);
                out.flush();
                out.close();
                pictures.add(picture);
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return pictures;
    }

    //截取html标签中的img的src 没有测多个 先不要用
    public Set<String> getImgStr(String htmlStr) {
        Set<String> pics = new HashSet<String>();
        String img = "";
        Pattern p_image;
        Matcher m_image;
        String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
        p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
        m_image = p_image.matcher(htmlStr);
        while (m_image.find()) {
            // 得到<img />数据
            img = m_image.group();
            // 匹配<img>中的src数据
            Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
            while (m.find()) {
                pics.add(m.group(1));
            }
        }
        return pics;
    }

    public static String checkSuffix(String baseStr) {
        if (baseStr.contains(PictureSuffix.Png))
            return baseStr.replace(PictureSuffix.Png, "");
        else if (baseStr.contains(PictureSuffix.Gif))
            return baseStr.replace(PictureSuffix.Gif, "");
        else if (baseStr.contains(PictureSuffix.Jfif))
            return baseStr.replace(PictureSuffix.Jfif, "");
        else if (baseStr.contains(PictureSuffix.Pjpeg))
            return baseStr.replace(PictureSuffix.Pjpeg, "");
        else if (baseStr.contains(PictureSuffix.Bmp))
            return baseStr.replace(PictureSuffix.Bmp, "");
        else if (baseStr.contains(PictureSuffix.Pjp))
            return baseStr.replace(PictureSuffix.Pjp, "");
        else if (baseStr.contains(PictureSuffix.Jpg))
            return baseStr.replace(PictureSuffix.Jpg, "");
        else if (baseStr.contains(PictureSuffix.Dib))
            return baseStr.replace(PictureSuffix.Dib, "");
        else if (baseStr.contains(PictureSuffix.Ico))
            return baseStr.replace(PictureSuffix.Ico, "");
        else
            return baseStr.replace(PictureSuffix.Jpeg, "");

    }
}

Java解析HTML富文本可以使用Jsoup库。Jsoup是一个开源的Java HTML解析器,可以方便地从HTML文档中提取数据。 使用Jsoup解析HTML富文本的步骤如下: 1. 导入Jsoup库:在项目中引入Jsoup库的依赖,可以通过Maven或Gradle进行引入。 2. 获取HTML文档:可以通过URL、文件或字符串等方式获取HTML文档。 3. 解析HTML文档:使用Jsoup提供的API进行解析,可以根据需要提取标签、属性、文本内容等信息。 下面是一个简单的示例代码,演示了如何使用Jsoup解析HTML富文本: ```java import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class HtmlParser { public static void main(String[] args) { String html = "<html><body><div><h1>标题</h1><p>段落</p></div></body></html>"; // 解析HTML文档 Document doc = Jsoup.parse(html); // 提取标题 Element titleElement = doc.select("h1").first(); String title = titleElement.text(); System.out.println("标题:" + title); // 提取段落 Elements paragraphElements = doc.select("p"); for (Element paragraphElement : paragraphElements) { String paragraph = paragraphElement.text(); System.out.println("段落:" + paragraph); } } } ``` 运行以上代码,输出结果为: ``` 标题:标题 段落:段落 ``` 这个示例演示了如何从HTML文档中提取标题和段落内容。你可以根据需要使用Jsoup提供的API进行更复杂的解析操作。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值