判定文件编码或文本流编码的方法

最新推荐文章于 2024-09-16 10:50:00 发布

iteye_16368

最新推荐文章于 2024-09-16 10:50:00 发布

阅读量120

点赞数

分类专栏： java 文章标签： Java .net XML HTML

本文链接：https://blog.csdn.net/iteye_16368/article/details/81815748

版权

java 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

import info.monitorenter.cpdetector.io.ASCIIDetector;
import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
import info.monitorenter.cpdetector.io.JChardetFacade;
import info.monitorenter.cpdetector.io.ParsingDetector;
import info.monitorenter.cpdetector.io.UnicodeDetector;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.nio.charset.Charset;

/**
 * <p>
 * 本类用来探测字符的编码集,关返回其名称
 * </p>
 * 
 * @ * @vision 1.0
 */
public class Detector {
	/*------------------------------------------------------------------------ 
	  detectorProxy是探测器，它把探测任务交给具体的探测实现类的实例完成。 
	  cpDetector内置了一些常用的探测实现类，这些探测实现类的实例可以通过add方法 
	  加进来，如ParsingDetector、 JChardetFacade、ASCIIDetector、UnicodeDetector。   
	  detector按照“谁最先返回非空的探测结果，就以该结果为准”的原则返回探测到的 
	  字符集编码。 
	--------------------------------------------------------------------------*/
	private static CodepageDetectorProxy detectorProxy;
	static {
		detectorProxy = CodepageDetectorProxy.getInstance();
		/*------------------------------------------------------------------------- 
		  ParsingDetector可用于检查HTML、XML等文件或字符流的编码,构造方法中的参数用于 
		  指示是否显示探测过程的详细信息，为false不显示。 
		---------------------------------------------------------------------------*/
		detectorProxy.add(new ParsingDetector(false));
		/*-------------------------------------------------------------------------- 
		  JChardetFacade封装了由mozilla1组织提供的JChardet，它可以完成大多数文件的编码 
		  测定。所以，一般有了这个探测器就可满足大多数项目的要求，如果你还不放心，可以 
		  再多加几个探测器，比如下面的ASCIIDetector、UnicodeDetector等。 
		 ---------------------------------------------------------------------------*/
		detectorProxy.add(JChardetFacade.getInstance());
		// ASCIIDetector用于ASCII编码测定
		detectorProxy.add(ASCIIDetector.getInstance());
		// UnicodeDetector用于unicode1家族编码的测定
		detectorProxy.add(UnicodeDetector.getInstance());

	}

	public static synchronized String getEncodingType(String content)
			throws IllegalArgumentException, IOException {
		ByteArrayInputStream stream = new ByteArrayInputStream(content
				.getBytes());
		return Detector.getEncodingType(stream, content.length());
	}

	public static synchronized String getEncodingType(File file)
			throws MalformedURLException, IOException {
		Charset charset = detectorProxy.detectCodepage(file.toURL());
		if (charset != null) {
			return charset.name();
		} else
			return "未知";
	}

	public static synchronized String getEncodingType(InputStream inputStream,
			int length) throws IllegalArgumentException, IOException {
		Charset charset = detectorProxy.detectCodepage(inputStream, length);
		if (charset != null) {
			return charset.name();
		} else
			return "未知";
	}
}