package com.yy.game.test;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
public class UTF8Checker {
public static void main(String[] args) throws IOException {
File dir = new File("F:\\test");
for (File file : dir.listFiles()) {
System.out.format("%s: %s, %s%n", file, check(file), check2(file));
}
}
/**
* JDK自带API实现
*/
@SuppressWarnings("resource")
public static boolean check2(File file) throws IOException {
long start = System.nanoTime();
FileChannel fc = null;
try {
fc = new FileInputStream(file).getChannel();
MappedByteBuffer buf = fc.map(MapMode.READ_ONLY, 0, fc.size());
Charset utf8 = Charset.forName("UTF-8");
CharsetDecoder decoder = utf8.newDecoder();
CharBuffer cbuf = CharBuffer.allocate((int) (buf.limit() * decoder.averageCharsPerByte()));
CoderResult result = decoder.decode(buf, cbuf, true);
return !result.isError();
} finally {
if (fc != null) {
fc.close();
}
long end = System.nanoTime();
System.out.println("used(ns):" + (end - start));
}
}
/**
* 自定义实现
*/
public static boolean check(File file) throws IOException {
long start = System.nanoTime();
InputStream in = null;
try {
in = new BufferedInputStream(new FileInputStream(file));
StreamBuffer sbuf = new StreamBuffer(in, 1024);
if (sbuf.next() == 0xEF && sbuf.next() == 0xBB && sbuf.next() == 0xBF) {
return true;
}
sbuf.redo();
// 1. U-00000000 - U-0000007F: 0xxxxxxx
// 2. U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
// 3. U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
// 4. U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
// 5. U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
// 6. U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
for (int ch = 0; (ch = sbuf.next()) != -1;) {
int n = 0;
if (ch <= 0x7F) {
n = 1;
} else if (ch <= 0xBF) {
return false;
} else if (ch <= 0xDF) {
n = 2;
} else if (ch <= 0xEF) {
n = 3;
} else if (ch <= 0xF7) {
n = 4;
} else if (ch <= 0xFB) {
n = 5;
} else if (ch <= 0xFD) {
n = 6;
} else {
return false;
}
while (--n > 0) {
if ((sbuf.next() & 0x80) != 0x80) {
return false;
}
}
}
return true;
} finally {
if (in != null) {
in.close();
}
long end = System.nanoTime();
System.out.println("used(ns):" + (end - start));
}
}
static class StreamBuffer {
final InputStream in;
final byte[] buf;
int pos = -1;// 初始值为-1,表示指针尚未移动.
int len;
public StreamBuffer(InputStream in, int size) {
this.in = in;
if (size < 3) {
size = 3;
}
this.buf = new byte[size];
}
public void redo() {
this.pos = 0;
}
public int next() throws IOException {
if (len > 0 || pos < 0) {
if (++pos == len) {
if ((len = in.read(buf)) == 0) {
return -1;
}
pos = 0;
}
return this.buf[this.pos] & 0xFF;
} else {
return -1;
}
}
}
}