最近在项目中需要判断文件类型,如果只根据文件后缀名称,还不够严格(有可能手动修改后缀名称),所以这种判断文件类型有差错,想起了以前在网上看到使用根据读取头文件部分内容与标准格式文件对比,即可准确判断文件类型,这里可以读取3个字节,或者10个字节
具体如下:
10个字节一种判断
private static final HashMap<String, String> mFileTypes = new HashMap<String, String>();
// judge file type by file header content
static {
mFileTypes.put("ffd8ffe000104a464946", "jpg"); //JPEG (jpg)
mFileTypes.put("89504e470d0a1a0a0000", "png"); //PNG (png)
mFileTypes.put("47494638396126026f01", "gif"); //GIF (gif)
mFileTypes.put("49492a00227105008037", "tif"); //TIFF (tif)
mFileTypes.put("424d228c010000000000", "bmp"); //16色位图(bmp)
mFileTypes.put("424d8240090000000000", "bmp"); //24位位图(bmp)
mFileTypes.put("424d8e1b030000000000", "bmp"); //256色位图(bmp)
mFileTypes.put("41433130313500000000", "dwg"); //CAD (dwg)
mFileTypes.put("3c21444f435459504520", "html"); //HTML (html)
mFileTypes.put("3c21646f637479706520", "htm"); //HTM (htm)
mFileTypes.put("48544d4c207b0d0a0942", "css"); //css
mFileTypes.put("696b2e71623d696b2e71", "js"); //js
mFileTypes.put("7b5c727466315c616e73", "rtf"); //Rich Text Format (rtf)
mFileTypes.put("38425053000100000000", "psd"); //Photoshop (psd)
mFileTypes.put("46726f6d3a203d3f6762", "eml"); //Email [Outlook Express 6] (eml)
mFileTypes.put("d0cf11e0a1b11ae10000", "doc"); //MS Excel 注意:word、msi 和 excel的文件头一样
mFileTypes.put("d0cf11e0a1b11ae10000", "vsd"); //Visio 绘图
mFileTypes.put("5374616E64617264204A", "mdb"); //MS Access (mdb)
mFileTypes.put("252150532D41646F6265", "ps");
mFileTypes.put("255044462d312e350d0a", "pdf");