先贴段 guessContentTypeFromStream 实现代码
感觉只判断前几个字节的话,还是有点问题的
比如我一个txt文本
以RIFF开头就会被判别成
audio/x-wav
总之有点不够严谨,但一般情况下应该是够用了
static
public
String guessContentTypeFromStream(InputStream is)
throws
IOException {
// If we can't read ahead safely, just give up on guessing
if
(!is.markSupported())
return
null
;
is.mark(12);
int
c1 = is.read();
int
c2 = is.read();
int
c3 = is.read();
int
c4 = is.read();
int
c5 = is.read();
int
c6 = is.read();
int
c7 = is.read();
int
c8 = is.read();
int
c9 = is.read();
int
c10 = is.read();
int
c11 = is.read();
is.reset();
if
(c1 == 0xCA && c2 == 0xFE && c3 == 0xBA && c4 == 0xBE) {
return
"application/java-vm"
;
}
if
(c1 == 0xAC && c2 == 0xED) {
// next two bytes are version number, currently 0x00 0x05
return
"application/x-java-serialized-object"
;
}
if
(c1 ==
'<'
) {
if
(c2 ==
'!'
|| ((c2 ==
'h'
&& (c3 ==
't'
&& c4 ==
'm'
&& c5 ==
'l'
||
c3 ==
'e'
&& c4 ==
'a'
&& c5 ==
'd'
) ||
(c2 ==
'b'
&& c3 ==
'o'
&& c4 ==
'd'
&& c5 ==
'y'
))) ||
((c2 ==
'H'
&& (c3 ==
'T'
&& c4 ==
'M'
&& c5 ==
'L'
||
c3 ==
'E'
&& c4 ==
'A'
&& c5 ==
'D'
) ||
(c2 ==
'B'
&& c3 ==
'O'
&& c4 ==
'D'
&& c5 ==
'Y'
)))) {
return
"text/html"
;
}
if
(c2 ==
'?'
&& c3 ==
'x'
&& c4 ==
'm'
&& c5 ==
'l'
&& c6 ==
' '
) {
return
"application/xml"
;
}
}
// big and little endian UTF-16 encodings, with byte order mark
if
(c1 == 0xfe && c2 == 0xff) {
if
(c3 == 0 && c4 ==
'<'
&& c5 == 0 && c6 ==
'?'
&&
c7 == 0 && c8 ==
'x'
) {
return
"application/xml"
;
}
}
if
(c1 == 0xff && c2 == 0xfe) {
if
(c3 ==
'<'
&& c4 == 0 && c5 ==
'?'
&& c6 == 0 &&
c7 ==
'x'
&& c8 == 0) {
return
"application/xml"
;
}
}
if
(c1 ==
'G'
&& c2 ==
'I'
&& c3 ==
'F'
&& c4 ==
'8'
) {
return
"image/gif"
;
}
if
(c1 ==
'#'
&& c2 ==
'd'
&& c3 ==
'e'
&& c4 ==
'f'
) {
return
"image/x-bitmap"
;
}
if
(c1 ==
'!'
&& c2 ==
' '
&& c3 ==
'X'
&& c4 ==
'P'
&&
c5 ==
'M'
&& c6 ==
'2'
) {
return
"image/x-pixmap"
;
}
if
(c1 == 137 && c2 == 80 && c3 == 78 &&
c4 == 71 && c5 == 13 && c6 == 10 &&
c7 == 26 && c8 == 10) {
return
"image/png"
;
}
if
(c1 == 0xFF && c2 == 0xD8 && c3 == 0xFF) {
if
(c4 == 0xE0) {
return
"image/jpeg"
;
}
/**
* File format used by digital cameras to store images.
* Exif Format can be read by any application supporting
* JPEG. Exif Spec can be found at:
* http://www.pima.net/standards/it10/PIMA15740/Exif_2
-
1.PDF
*/
if
((c4 == 0xE1) &&
(c7 ==
'E'
&& c8 ==
'x'
&& c9 ==
'i'
&& c10 ==
'f'
&&
c11 == 0)) {
return
"image/jpeg"
;
}
if
(c4 == 0xEE) {
return
"image/jpg"
;
}
}
if
(c1 == 0xD0 && c2 == 0xCF && c3 == 0x11 && c4 == 0xE0 &&
c5 == 0xA1 && c6 == 0xB1 && c7 == 0x1A && c8 == 0xE1) {
/* Above is signature of Microsoft Structured Storage.
* Below this, could have tests for various SS entities.
* For now, just test for FlashPix.
*/
if
( checkfpx(is)) {
return
"image/vnd.fpx"
;
}
}
if
(c1 == 0x2E && c2 == 0x73 && c3 == 0x6E && c4 == 0x64) {
return
"audio/basic"
;
// .au format, big endian
}
if
(c1 == 0x64 && c2 == 0x6E && c3 == 0x73 && c4 == 0x2E) {
return
"audio/basic"
;
// .au format, little endian
}
if
(c1 ==
'R'
&& c2 ==
'I'
&& c3 ==
'F'
&& c4 ==
'F'
) {
/* I don't know if this is official but evidence
* suggests that .wav files start with "RIFF" - brown
*/
return
"audio/x-wav"
;
}
return
null
;
}