// 去掉多个换行
ontent = content.replaceAll("(\r?\n(\\s*\r?\n)+)", "\r\n");
[b]从word中读取内容,通过正则去掉文章里的多个连续空行[/b]
package cn.com.quiz;
/*
读取word内容并把内容写入到一个String中
去掉那些
换行+(任意个空白+换行) 替换为一个换行
换行可能是\n 也可能是 \r\n 所以使用 \r?
*/
import java.io.*;
import org.textmining.text.extraction.WordExtractor;
class FileToString
{
public static void main(String[] args) throws IOException
{
FileInputStream in = new FileInputStream(".\\123.doc");
WordExtractor extractor = new WordExtractor();
try
{
String out = extractor.extractText(in);
System.out.println("before: " + out);
out = out.replaceAll("(\r?\n(\\s*\r?\n)+)", "\r\n");
System.out.println("after: " + out);
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
ontent = content.replaceAll("(\r?\n(\\s*\r?\n)+)", "\r\n");
[b]从word中读取内容,通过正则去掉文章里的多个连续空行[/b]
package cn.com.quiz;
/*
读取word内容并把内容写入到一个String中
去掉那些
换行+(任意个空白+换行) 替换为一个换行
换行可能是\n 也可能是 \r\n 所以使用 \r?
*/
import java.io.*;
import org.textmining.text.extraction.WordExtractor;
class FileToString
{
public static void main(String[] args) throws IOException
{
FileInputStream in = new FileInputStream(".\\123.doc");
WordExtractor extractor = new WordExtractor();
try
{
String out = extractor.extractText(in);
System.out.println("before: " + out);
out = out.replaceAll("(\r?\n(\\s*\r?\n)+)", "\r\n");
System.out.println("after: " + out);
}
catch (Exception e)
{
e.printStackTrace();
}
}
}