public static String removeHTML(String htmlString)
{
// Remove HTML tag from java String
String noHTMLString = htmlString.replaceAll("//<.>", "");
// Remove Carriage return from java String
noHTMLString = noHTMLString.replaceAll("/r", "
");
// Remove New line from java string and replace html break
noHTMLString = noHTMLString.replaceAll("/n", " ");
noHTMLString = noHTMLString.replaceAll("/'", "'");
noHTMLString = noHTMLString.replaceAll("/"", """);
return noHTMLString;
}
public static void main(String[] args) {
String strHTML= ""+
"
"+"
Convert HTML to Text String"+""+
"
"+"This is HTML String of java's source code /"my program/""+
""+
"";
String stringWithoutHTML=removeHTML(strHTML);
System.out.println(stringWithoutHTML);
}
publicstaticString regEx_script ="
publicstaticString regEx_style ="
publicstaticString regEx_html ="<[^>]+>";
publicstaticPattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
publicstaticPattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
publicstaticPattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
publicstaticString getOptimizedData(String inputString) {
if(inputString ==null) {
returninputString;
}
//stripping script tags whether thetagcontains "/n" or "/r" or not.
Matcher m_script = p_script.matcher(inputString);
String htmlStr = m_script.replaceAll("");
//stripping style tags whether thetagcontains "/n" or "/r" or not.
Matcher m_style = p_style.matcher(htmlStr);
htmlStr = m_style.replaceAll("");
//strippinghtmltags but continue to have the "/n" and "/r" in right place.
Matcher m_html = p_html.matcher(htmlStr);
htmlStr = m_html.replaceAll("");
returnhtmlStr;
}