java通过判断文本的编码方式再读取文件

原理:了解utf8的编码方式
可以判断该文本是否是utf8编码的

参考:http://www.iteye.com/topic/398782

            http://www.iteye.com/topic/191552

            http://bbs.csdn.net/topics/240042395

 

代码:

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

public class ReadUnknowCodeTxt {
 public byte[] readTxtFile(String filename) {
        byte[] b = new byte[104];//取前100个左右字节进行判断
        FileInputStream fis;
  try {
   fis = new FileInputStream(filename);
         DataInputStream dis = new DataInputStream(fis);
         int len = dis.read(b);
  } catch (FileNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }

        return b;
    }
 public String _readTxtFile(String filename,String code){
  InputStreamReader isr = null;
  String tmp = null;
  StringBuffer sb = null;
  try {
   isr = new InputStreamReader(new FileInputStream(filename),code);
   BufferedReader bufferedReader = new BufferedReader(isr);
   sb = new StringBuffer();
   while ((tmp = bufferedReader.readLine()) != null) {
    sb.append(tmp).append("\r\n");
   }
  } catch (FileNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (UnsupportedEncodingException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  return sb!=null?sb.toString():null;
 }
 public boolean isUTF8(byte[] c){
  boolean flag = true;
  int len = c.length-4;//为了防止后面的数组取值越界
  for(int i=0;i<len;i++){
      System.out.println("c["+i+"]"+Integer.toHexString(c[i] & 0xFF));
      if((c[i]>>7 & 0xff)==0x00){
       continue;
      }
      if((c[i]>>4 & 0xff)==0xff && (c[++i]>>6 & 0xff)==0xfe && (c[++i]>>6 & 0xff)==0xfe && (c[++i]>>6 & 0xff)==0xfe){
       continue;
      }
      if((c[i]>>5 & 0xff)==0xff && (c[++i]>>6 & 0xff)==0xfe && (c[++i]>>6 & 0xff)==0xfe){
       continue;
      }
      if((c[i]>>6 & 0xff)==0xff && (c[++i]>>6 & 0xff)==0xfe){
       continue;
      }
      flag = false;
      System.out.println("gbk:  i="+i);
      break;
     }
  return flag;
 }
 public static void main(String[] args){
      String path = "D:\\gbk.txt";
      String code = "utf8";
      ReadUnknowCodeTxt ruct = new ReadUnknowCodeTxt();
      if(ruct.isUTF8(ruct.readTxtFile(path))){
       System.out.println("utf8");
      }else{
       System.out.println("gbk");
       code = "gbk";
      }
      String str = ruct._readTxtFile(path, code);
   System.out.println(str);
 }
}


测试了一些文本,暂时没发现问题

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值