java判断utf8编码_Java检测文件是否UTF8编码

package com.yy.game.test;

import java.io.BufferedInputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.nio.CharBuffer;

import java.nio.MappedByteBuffer;

import java.nio.channels.FileChannel;

import java.nio.channels.FileChannel.MapMode;

import java.nio.charset.Charset;

import java.nio.charset.CharsetDecoder;

import java.nio.charset.CoderResult;

public class UTF8Checker {

public static void main(String[] args) throws IOException {

File dir = new File("F:\\test");

for (File file : dir.listFiles()) {

System.out.format("%s: %s, %s%n", file, check(file), check2(file));

}

}

/**

* JDK自带API实现

*/

@SuppressWarnings("resource")

public static boolean check2(File file) throws IOException {

long start = System.nanoTime();

FileChannel fc = null;

try {

fc = new FileInputStream(file).getChannel();

MappedByteBuffer buf = fc.map(MapMode.READ_ONLY, 0, fc.size());

Charset utf8 = Charset.forName("UTF-8");

CharsetDecoder decoder = utf8.newDecoder();

CharBuffer cbuf = CharBuffer.allocate((int) (buf.limit() * decoder.averageCharsPerByte()));

CoderResult result = decoder.decode(buf, cbuf, true);

return !result.isError();

} finally {

if (fc != null) {

fc.close();

}

long end = System.nanoTime();

System.out.println("used(ns):" + (end - start));

}

}

/**

* 自定义实现

*/

public static boolean check(File file) throws IOException {

long start = System.nanoTime();

InputStream in = null;

try {

in = new BufferedInputStream(new FileInputStream(file));

StreamBuffer sbuf = new StreamBuffer(in, 1024);

if (sbuf.next() == 0xEF && sbuf.next() == 0xBB && sbuf.next() == 0xBF) {

return true;

}

sbuf.redo();

// 1. U-00000000 - U-0000007F: 0xxxxxxx

// 2. U-00000080 - U-000007FF: 110xxxxx 10xxxxxx

// 3. U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx

// 4. U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

// 5. U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

// 6. U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

for (int ch = 0; (ch = sbuf.next()) != -1;) {

int n = 0;

if (ch <= 0x7F) {

n = 1;

} else if (ch <= 0xBF) {

return false;

} else if (ch <= 0xDF) {

n = 2;

} else if (ch <= 0xEF) {

n = 3;

} else if (ch <= 0xF7) {

n = 4;

} else if (ch <= 0xFB) {

n = 5;

} else if (ch <= 0xFD) {

n = 6;

} else {

return false;

}

while (--n > 0) {

if ((sbuf.next() & 0x80) != 0x80) {

return false;

}

}

}

return true;

} finally {

if (in != null) {

in.close();

}

long end = System.nanoTime();

System.out.println("used(ns):" + (end - start));

}

}

static class StreamBuffer {

final InputStream in;

final byte[] buf;

int pos = -1;// 初始值为-1,表示指针尚未移动.

int len;

public StreamBuffer(InputStream in, int size) {

this.in = in;

if (size < 3) {

size = 3;

}

this.buf = new byte[size];

}

public void redo() {

this.pos = 0;

}

public int next() throws IOException {

if (len > 0 || pos < 0) {

if (++pos == len) {

if ((len = in.read(buf)) == 0) {

return -1;

}

pos = 0;

}

return this.buf[this.pos] & 0xFF;

} else {

return -1;

}

}

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值