private ContentType getFileType(byte[] bytes) {
// 如果文件流的长度小于14,则返回others
if (bytes.length < 14) {
return ContentType.OTHERS;
}
// 获取前14个字节转成十六进制,判断
byte[] bytes2 = new byte[14];
System.arraycopy(bytes, 0, bytes2, 0, 14);
String fileHexString = getFileHexString(bytes2);
fileHexString = fileHexString.toUpperCase();
if (fileHexString.startsWith(ContentType.DOC.getFileTitle())){
return ContentType.DOC;
} else if (fileHexString.startsWith(ContentType.DOCX.getFileTitle())) {
// zip和docx的头文件相同,进一步确认docx
//
// 获取最后500字节的字符
byte[] bytes500 = new byte[500];
System.arraycopy(bytes, bytes.length - 500, bytes500, 0, 500);
fileHexString = getFileHexString(bytes500);
if (fileHexString.contains("776f72642f")) { //转换成ascii码的含义是 word/
return ContentType.DOCX;
}
} else if (fileHexString.startsWith(ContentType.PDF.getFileTitle())) {
return ContentType.PDF;
}
return ContentType.OTHERS;
}
public final static String getFileHexString(byte[] b) {
StringBuilder stringBuilder = new StringBuilder();
if (b == null || b.length <= 0) {
return null;
}
for (int i = 0; i < b.length; i++) {
int v = b[i] & 0xFF;
String hv = Integer.toHexString(v);
if (hv.length() < 2) {
stringBuilder.append(0);
}
stringBuilder.append(hv);
}
return stringBuilder.toString();
}
public enum ContentType{
DOC("D0CF11E0"), //MS Excel 注意:word 和 excel的文件头一样
DOCX("504B0304"), // docx和zip文件头一样
PDF("255044462D312E"),
OTHERS("");
private String fileTitle;
ContentType(String fileTitle){
this.fileTitle = fileTitle;
}
public String getFileTitle() {
return fileTitle;
}
}
参考文档:
(75条消息) 纯js判断文件流格式类型:pdf,doc,docx,xls,xlsx,ppt,pptx一次搞定!_js 判断文件格式_csdnyiiran的博客-CSDN博客