文件任意编码转换工具 ExecuteConvertFile

在项目中引用了多个地方的文件,但是编码方式不统一,有gb2312、gbk、gb18030、ascii、utf-8等,很混乱。

 

在网上查找了一些编码转换的工具,但是都不理想。于是就自己写了一个编码转换的工具。可以将指定的文本类型源文件或者目录中的所有指定的文本类型子文件都转换成指定的编码类型,并按路径结构保存到目标目录下。其他非文本类型的文件,则直接复制保存。

 

其中引用了mozilla的一个编码查询的包,用于检索文件的编码类型。

 

以下是主要的代码:(jar包在附件中)

 

接口:
ConvertEncoding.java 编码转换

/**
 * FileName:EncodingConvert.java
 * Creater: Landry
 * Create Date:2010-3-15
 * Commonents:
 * Version: 1.0
 */
package com.landry.encoding;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;

/**
 * Project:ConvertTxtEncoding
 * Create Dat:2010-3-15
 * Modified Date:
 * Commonents: 文件编码的转换并保存
 * @author Landry
 * @version 1.0
 */
public interface ConvertEncoding
{
	/**
	 * 得到文件的编码类型
	 * @param file
	 * @return 文件的编码类型
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public String getFileCharacter (File file) throws FileNotFoundException,
			IOException;
	
	/**
	 * 判断是否是指定的文本类型的文件
	 * @param file
	 * @return true:是指定的文本类型 false:不是指定的文本类型
	 */
	public boolean isTextFile (File file);
	
	/**
	 * 设置包含的要进行处理的文件(扩展名),以"<b>|</b>"进行分隔<br/>
	 * 如果没有设置,则执行默认包含的文本文件:txt、ini、java、jsp、jspa、htm、html、xml、js、vbs、css、properties、ftl、php、asp
	 * @param fileExtName
	 */
	public void setInclude (String fileExtName);

	/**
	 * 得到文件的扩展名
	 * @param file
	 * @return null表示不是标准的文件,可能是目录
	 */
	public String getFileExtName (File file);
	
	/**
	 * 按encoding指定的编码方式,将文件转换到指定目录
	 * @param inFilename
	 * @param outFilename
	 * @param encoding
	 * @throws IOException
	 * @throws FileNotFoundException
	 */
	public void convertEncoding (String inFilename, String outFilename,
			String encoding) throws FileNotFoundException, IOException;
	
	/**
	 *文本文件转换编码的核心过程
	 * @param inFilename
	 * @param outFilename
	 * @param encoding
	 * @throws IOException
	 * @throws FileNotFoundException
	 */
	public void convertEncodingProcess (String inFilename, String outFilename,
			String encoding) throws FileNotFoundException, IOException;
	
	/**
	 * 直接复制标准文件
	 * @param inFilename
	 * @param outFilename
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public void copyBinaryFile (String inFilename, String outFilename)
			throws FileNotFoundException, IOException;
}

 
ExecuteConvertFile.java 执行文件的转换保存

/**
 * FileName:ExecuteFile.java
 * Creater: Landry
 * Create Date:2010-3-17
 * Commonents:
 * Version: 1.0
 */
package com.landry.encoding;

import java.io.FileNotFoundException;
import java.io.IOException;

/**
 * Project:ConvertTxtEncoding
 * Create Dat:2010-3-17
 * Modified Date:
 * Commonents:
 * @author Landry
 * @version 1.0
 */
public interface ExecuteConvertFile
{
	/**
	 * 执行文件编码转换
	 * 1、inFilename为标准文件时,outFilename可以为目录,即保存到此目录下;
	 * 也可以为文件名,即重命名保存
	 * 2、inFilename为目录时,outFilename必须为目录,即将源目录下的所有文件保存到目的路径下
	 * @param inFilename 源文件名
	 * @param outFilename 目的文件名
	 * @param encoding 要转换成的编码
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public void executeConvertFile (String inFilename, String outFilename,
			String encoding) throws FileNotFoundException, IOException;

	/**
	 * 设置包含的要进行处理的文件(扩展名),以"|"进行分隔
	 * 如果没有设置,则执行默认包含的文本文件:txt、ini、java、jsp、jspa、htm、html、xml、js、vbs、css、properties、ftl、php、asp
	 * @param fileExtName
	 */
	public void setInclude (String fileExtName);
}

 

实现类:
ConvertEncodingImpl.java

/**
 * FileName:EncodingConvertImpl.java
 * Creater: Landry
 * Create Date:2010-3-15
 * Commonents:
 * Version: 1.0
 */
package com.landry.encoding;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.HashSet;

/**
 * Project:ConvertTxtEncoding
 * Create Dat:2010-3-15
 * Modified Date:
 * Commonents:
 * @author Landry
 * @version 1.0
 */
public class ConvertEncodingImpl implements ConvertEncoding
{
	/** 要进行处理的文件集合 */
	HashSet<String> include = new HashSet<String>();
	private final String defaultIncludeFile = "txt|ini|java|jsp|jspa|htm|html|xml|js|vbs|css|properties|ftl|php|asp";
	
	public ConvertEncodingImpl ()
	{
		// 如果没有设置要进行处理的文件,就执行默认包含的文件
		if (include.isEmpty())
		{
			setInclude(defaultIncludeFile);
		}
	}
	
	@Override
	public void setInclude (String fileExtName)
	{
		String[] arr = fileExtName.split("\\|");
		for (String name : arr)
		{
			include.add(name.trim());
		}
	}
	
	@Override
	public void convertEncoding (String inFilename, String outFilename,
			String encoding) throws FileNotFoundException, IOException
	{
		// 源文件
		File inFile = new File(inFilename);
		if (!inFile.exists())
		{
			System.out.println(inFile.getAbsolutePath() + "不存在");
			return;
		}
		// 目的文件
		File outFile = new File(outFilename);
		
		if (inFile.isDirectory())// 如果源路径是目录,则在目的路径创建目录后返回
		{
			outFile.mkdirs();
			return;
		}
		else
		{
			outFile.createNewFile();
			boolean isTxt = isTextFile(inFile);
			if (isTxt)// 如果是文本文件,进行编码转换
				convertEncodingProcess(inFilename, outFilename, encoding);
			else
				// 如果是非文本文件,直接复制
				copyBinaryFile(inFilename, outFilename);
		}
	}
	
	@Override
	public void convertEncodingProcess (String inFilename, String outFilename,
			String encoding) throws FileNotFoundException, IOException
	{
		File inFile = new File(inFilename);
		String currentEncoding = getFileCharacter(inFile);
		File outFile = new File(outFilename);
		
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				new FileInputStream(inFile), currentEncoding));
		BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
				new FileOutputStream(outFile), encoding));

		String str = "";
		while ((str = reader.readLine()) != null)
		{
			writer.write(str + "\r\n");
		}
		writer.flush();
		reader.close();
		writer.close();
	}
	
	@Override
	public void copyBinaryFile (String inFilename, String outFilename)
			throws FileNotFoundException, IOException

	{
		File inFile = new File(inFilename);
		File outFile = new File(outFilename);

		BufferedInputStream bis = new BufferedInputStream(new FileInputStream(
				inFile));
		BufferedOutputStream bos = new BufferedOutputStream(
				new FileOutputStream(outFile));
		
		byte[] data = new byte[1];
		while (bis.read(data) != -1)
		{
			bos.write(data);
		}
		bos.flush();
		bis.close();
		bos.close();
	}
	
	@Override
	public String getFileCharacter (File file) throws FileNotFoundException,
			IOException
	{
		
		CharsetDetector charDect = new CharsetDetector();
		FileInputStream fis = new FileInputStream(file);
		String[] probableSet = charDect.detectChineseCharset(fis);
		
		if (probableSet.length == 1)
		{
			if (probableSet[0].equals("x-euc-tw"))
			{
				return "GB2312";
			}
			else
			{
				return probableSet[0];
			}
		}
		else if (probableSet.length > 1)
		{
			for (String character : probableSet)
			{
				if (character.equals("GB2312"))
					return "GB2312";
				if (character.equals("UTF-8"))
					return "UTF-8";
				if (character.equals("ASCII"))
					return "ASCII";
			}
		}
		
		return null;
	}
	
	@Override
	public String getFileExtName (File file)
	{
		if (file.isDirectory())
			return null;
		String fullName = file.getName();
		int index = fullName.indexOf(".");
		return fullName.substring(index + 1);
	}
	
	@Override
	public boolean isTextFile (File file)
	{
		String extName = getFileExtName(file);
		return include.contains(extName);
	}
	
}

 
ExecuteConvertFileImpl.java

/**
 * FileName:ExecuteConvertFileImpl.java
 * Creater: Landry
 * Create Date:2010-3-17
 * Commonents:
 * Version: 1.0
 */
package com.landry.encoding;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;

/**
 * Project:ConvertTxtEncoding
 * Create Dat:2010-3-17
 * Modified Date:
 * Commonents:
 * @author Landry
 * @version 1.0
 */
public class ExecuteConvertFileImpl implements ExecuteConvertFile
{
	/** 最开始的源文件 */
	private File srcFile;
	/** 最开始的目的地文件 */
	private File targetFile;
	/** 文件分隔符 */
	private final String separator = System.getProperties().getProperty(
			"file.separator");
	final ConvertEncoding convertEncoding = new ConvertEncodingImpl();

	@Override
	/**
	 * 设置包含的要进行处理的文件(扩展名),以"|"进行分隔
	 * 如果没有设置,则执行默认包含的文本文件:txt、ini、java、jsp、jspa、htm、html、xml、js、vbs、css、properties、ftl、php、asp
	 * @param fileExtName
	 */
	public void setInclude (String fileExtName)
	{
		convertEncoding.setInclude(fileExtName);
	}

	@Override
	public void executeConvertFile (String inFilename, String outFilename,
			String encoding) throws FileNotFoundException, IOException
	{
		srcFile = new File(inFilename);
		targetFile = new File(outFilename);
		if (!srcFile.exists())
		{
			System.out.println("源文件不存在:" + srcFile.getAbsolutePath());
			return;
		}
		execute(inFilename, outFilename, encoding);
	}
	
	/**
	 * 具体执行文件转换的操作
	 * @param inFilename
	 * @param outFilename
	 * @param encoding
	 * @throws IOException
	 * @throws FileNotFoundException
	 */
	private void execute (String inFilename, String outFilename, String encoding)
			throws FileNotFoundException, IOException
	{
		File inFile = new File(inFilename);
		if (!inFile.isDirectory())// 如果不是目录,即是标准文件
		{
			convertEncoding.convertEncoding(inFilename, outFilename, encoding);// 直接进行转换并保存
		}
		else
		{

			File outFile = new File(outFilename);
			outFile.mkdirs();// 创建相对应的目的目录

			String[] fileList = inFile.list();// 得到所有的子文件和目录
			for (String childFilename : fileList)
			{
				String srcChildFilename = getCurSrcFilename(inFile,
						childFilename);
				String targetChildFilename = getCurTargetFilename(inFile,
						childFilename);
				// 递归调用,扫描所有子目录
				execute(srcChildFilename, targetChildFilename, encoding);

			}
		}
	}
	
	/**
	 * 得到当前子文件的源路径
	 * @param parent 父目录
	 * @param childFilename
	 * @return
	 */
	private String getCurSrcFilename (File parent, String childFilename)
	{
		return parent.getAbsoluteFile() + separator + childFilename;
	}
	
	/**
	 * 得到当前子文件的目的路径
	 * @param parent 父目录
	 * @param childFilename
	 * @return
	 */
	private String getCurTargetFilename (File parent, String childFilename)
	{
		String parentFilename = parent.getAbsolutePath();
		int index = srcFile.getAbsolutePath().length();
		String relativePath = parentFilename.substring(index);
		String curTargetFilename = targetFile.getAbsolutePath() + separator
				+ relativePath + separator + childFilename;
		return curTargetFilename;
	}
}

 

具体应用:

ExecuteConvertFileTest.java

package com.landry.test;

import java.io.FileNotFoundException;
import java.io.IOException;

import com.landry.encoding.ExecuteConvertFile;
import com.landry.encoding.ExecuteConvertFileImpl;

public class ExecuteConvertFileTest
{
	public static void main (String[] args)
	{
		ExecuteConvertFile execute = new ExecuteConvertFileImpl();
		// 目录
		String inFilename = "D:\\tomcat6\\webapps\\";
		// 源文件为单个文件
		// String inFilename = "D:\\tomcat6\\webapps\\doc\\aio.html";
		String outFilename = "D:\\output";
		String encoding = "UTF-8";
		try
		{
			execute.executeConvertFile(inFilename, outFilename, encoding);
		}
		catch (FileNotFoundException e)
		{
			e.printStackTrace();
		}
		catch (IOException e)
		{
			e.printStackTrace();
		}
	}
	
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值