Java去掉字符串中所有的标签，获取纯文本内容,获取src

最新推荐文章于 2022-06-01 16:36:01 发布

程序员_007

最新推荐文章于 2022-06-01 16:36:01 发布

阅读量2k

点赞数

分类专栏：实用代码块

实用代码块专栏收录该内容

231 篇文章 0 订阅

订阅专栏

package com.liuzy.javaopen.seivice;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Test {
/**
 * @Title: main 
 * @Description:
 * @param args 
 * @author 
 * @date 2016年2月17日
 * 1、去掉字符串中所有的标签，获取纯文本内容
 * 2、获取html节点中img的src路径
 */
public static void main(String[] args) {
 String html = "
  
  
   
   12132第一串字符
  
  


  
  
   
   这是第二窜字符
  
  
";
 Pattern p = Pattern.compile("
  
  
   
   ]+src\\s*=\\s*['\"]([^'\"]+)['\"][^>]*>");
 Matcher m = p.matcher(html);
 List
   
   
    
     srcs = new ArrayList
    
    
     
     ();
       while(m.find()){
           srcs.add(m.group(1));
       }
 String regex = "<[^>]*>";
 String str = html.replaceAll(regex, "");
 System.out.println(str+"\n"+srcs.get(0));
}
}