ZC: snpa_portable(SoftPerfect Network Protocol Analyzer) 抓到的 http数据 部分,是经过 gzip等压缩的,需要解压缩,然后 还要根据 网页的编码方式进行解码。
1、JavaSE (包 : httpPacketEncode)
1.1、ThttpPacketEncode
package httpPacketEncode; import java.io.*; import java.util.zip.*; //ZC: 修改于 “20161102 16:28” public class ThttpPacketEncode { // 测试抓数据包的网址:http://gd.joycool.net/ public static void main(String[] args) throws Exception { // *** // 16进制的字符串 ThttpPacketHexString http = new ThttpPacketHexString(); http.Fstr = "485454502F312E3120323030204F4B0D0A5365727665723A206E67696E780D0A446174653A205468752C203237204F637420323031362030343A30383A303620474D540D0A436F6E74656E742D547970653A20746578742F68746D6C3B636861727365743D7574662D380D0A5472616E736665722D456E636F64696E673A206368756E6B65640D0A436F6E6E656374696F6E3A206B6565702D616C6976650D0A5365742D436F6F6B69653A205349443D385756625F4D3B20506174683D2F0D0A43616368652D436F6E74726F6C3A206E6F2D63616368650D0A436F6E74656E742D456E636F64696E673A20677A69700D0A0D0A6162300D0A1F8B0800000000000003A5586D4F1B5716FEDE5FE1B5B4D56E453CE3F17B8B4DBBB4AB454AB69136EDAA8AF2C1D8831962CF783D9300AB6A65420063C0C6C104884931091068308642E2375E247E4AE27B67E6D3FE853D7706EC89ED34A0952CBF8CEF39F7B9E7E539E7DCEE9E9148D8F4908D899CC07BCD560B6D36B17C4008727CC86BFEE1CE5F6FB8CD3DBEEE3F7CFB7DEF9D9F6E7F671A9460F9ED1FFE72B3AFD764FEDB9D5B37CD26F30D8AFAA7AD97A2BEBDF3AD893C32D92DB4E94ECCCF8B9C045AFD618AFAEEEF66DF67DD9A30ECC78B5EF3A02445BFA4A8E1E161CBB0CD22C44294D5E3F15023648DD9D73DC8FA83BEEE082BF94DBC3FC27ACD0F3976382AC424B32920F012CB4B5EF330179406BD8C9DEEE278D8C91FBE2106FC61D66BB5309EAE07221BD37EFBFBE1116DA63ED0764BE8E7C2ECF751898B70FF668306A58CCD49D64A9C14667DF5CABC3A51C2E5324EA4E58D53345BEDA6F47FBAC31C7FDF34186307BC662A1A1346462D0151EC79E865CCA6181BF69A456934CC8A832C0B88A5D1289C406247240A1611F5947EBE7E2138EAEB1603312E2A99C458A0A16B4857E5FBAA9BD2FFF57507B987267F980B819702707C3606A0C37E114C09FF58C164036017304E588879CDA36C382C0C9B2F0E70099DAC80BD61BDAEAE29CF807B3EEBE62221D8022C2B982FD0C4D82835248C060421FC4D30C2F11407D6A7AC76BBDDC5D8EC34ED72382D216E801CA93F066FFE4B93F40AE10711DE12147A00117CEB0B7A3D769BDBECC34FF7F1F44C37E5F7FDDC5C1D168787448AE383EC8865488C9A7DA878AC247E6D59455C4AC5D81027C2E9F575F8701B4DCE765A1716421CAF2F92576AE864B165D1506040883D8850DA3B59D7A37D039C604B7C5443C96D22A11DEB6E136987735919BBDD03A62E2FA82F9689CCBDDF5FEE7231763861EE04552795E423B4B1032F88B47A790E8DAD3636FD8412BBD3053EBFBAC75C0CCD80D3DC36BBC732140D6941D87E3C715088523C3B2CEA960354F2AB78CB99B435065FF5C929025F392AD42BFBF5EAA40194323F8B571368226B8CA7F008C5DA6D7687"; http.Fstr += "CB6ABD8C9D0E86D67791D88806A5870B4266F9D4B1053451FA049E778BFBEFB28977934FD07E021DECBE5B9EC46FC650E5E8FDF84403239A4FE0C43CC4225A18BB214F5750E22D7EB32767D7F43FD0DC7AC30F06E7B763B27DEE8F44BF122520181F08C9D35A2C1A02A0D558A87258AF95E45C0615D7EAD559942E35316DBC42C559E5E0D71FFB6E83AE3FD9202CFEDC01869E2C2160C5BE66C2C82719A0A916CB740A568795B14180BFD85236CF40A20B67DFAAB949F9B753F934A59D60AAC39694967A020FA4C7DEF2F3FE909E7F8D94B1391C669F52D88243A1033D5ED4D5382AEFA1E44E97522CE1E9D366385F4597BCB72ECF4FA28C964F5A0E7A9B0A2E53D710833A512B7B55F43C47F09F9794FD84BC9B415B2B4AB1E06D21A74B05EDB9EF70D1609DBB6A2589D737EF69966850DAA514A944BD7A0DFA803788AC160E17050AA8C4EEB0B93D0ED0582F8F7F1E92BE42E9229ADF427B159CC8A074A65E4ED6CBF177F1AD86C9DB0FD98ED1E30086BAABEE1EA383190DA2A95DEA631841B60D225002E12F5DA107F2B55E3B0170A8FC54393DAD57ABD7C5E774117CCAD111CA9D5ED38420DA0A8F01CAB28242E0A1FF1E3F43C77174368E57A7EB277978073376301DE12F6331B90B79AE2ED5AE88C5E03D27ED767920B22135F06206A78F944405957E539FE795B38C3A5E40A5E7A83C090F09EB5D7258BB33DA5D08547617554ED4DAF215415DD626A6D53C369793B64259558A47F2B3C768F3048DA7C136F5E367EFE38FE0D5D9799D88C16AF50089DD459B8738FEAA059666D1CBB240B8D8EEA45D2E97534323116E0651B9F61C152BF5B3652555AAD76AF59345D0850A3BA8300FD094E42178AE612503AB0606FD1235E80F87B56C8A0942043207F429634960402261E0D38F2DAE57B3B8F214E572F84D093FDF94736565EC5963372F2E95D0EAB6CE12E725BD1F820A8FE36357E706AB9B06BF19AA9BB1A8756A925C2E487FDACED036B7B1E4B6114A7B7CE83474EDAD1CB4CDF17BD57D20C6B17C901259F627E1011FBA093DBE5EE795B314E4488BA9359EFE11C8EE07E8BBFAF801817473E42121369BD3E960C01C385FC6C0FBCF5E12E1AE66EC7F5A18729A1A1189184AE6D1C4A6EE1C8DA9BFF89EE35122876A5592580D7B0D0A1196226F8C16290D280E8F83261DD5DE5B942B347CFE059A5B94AB45B4F0049DBD06E5F2D4E235743A9D8C1562B055A7BA7C8826A6EBE5198868A0EE7AF957B49FC6CB6BF0BA8E72BBC7D60ED89012BAA3FC4198CB28FDFB37E4BBA1DEA3D331949E69F1D847C46EB1A208255B331AE4AAC3E571324E38DB52F13F7A8EA1C56605F2CAE305F95105A7B695B9F9F312DA3E00DBE913504BAA5C4C70FEA0E522F82D3C2B51FF7AC08A64EA3312B0BAB42717B238B5A99EAE68FEB97ADB6AB55A698FDB6173"; http.Fstr += "BAEC97CDE2653C74DE7F48E8A7FC3C3428460068AA8AF7B372E149233A1A51D5990AEDE07BA82F68F3294E2EC833C75D683DA3C6F3B8BCA36E6DEB1429EF54D5A5A376855AFBDCE8A12FC8D2EDB2E9D44DC8D20379834A9BF2AB8C9C2DA18544FD2447EADAEC0430A5BA5045E925BCFB0252F28ABAC924D6246237005F5C2125A03C0755001D3FC62B8B686B1202565E3D96AB6768E3153E5C6CE86E469D280981FB8CD16CCAA375547E4496DED3B31364E5DCB2D5E1A02D2E0F44494B5FD5C1961E978774036862176592F7E4AD315C7821A7F6D5A94CBDBA81971794956778664D9EDE8146BC81A9E11C2AE017A50F109DCDCB850A2A9750E571179A3C54C657D15A0AE5D77162B95E9D6ED710E0A451A3029CAD81004EE5D1EA7E97B21E2772E57118C6D497A97A35D3AE800AF9634116C2590CFB790875329D7EAD647795F42AAEAD03E17C0DA681DAA2D61EC3A1D0E42A5AAD36B4782FCC363F077DD479A9792E09C62C43FB0A79A1ACA4F09636FDE969D6F40B451647049E1DBD23447500FA9C70B1DE501D3FC1BA307579A04E6FCFFC113DD905B40D9C2D9B45EFDF84195BDF4A79F30248F0DA5B310E3769DA8A7B50C249FBB45204EBAA2F7FC1D3BB1FDBB53FEC0F1836C6636FC168D7DBD8E6B1396D5638A36E77D8583E4DC2EB7D3C89B24520ECF7718D33F589425FA4BC798CABC074BA1B5162B25E7DDD99E96C214B68C0CF5B02428484F4DE349A5BB0E8D40815413939812B00B822D27BA70FAE4386C9B4465A1C2A24F4033B357D6F32812AFC720676C74B7979A764917349BC528551142E213AA8A23A24195CAC9024D3CF43DA9AE3D71632CE41FB953B45078B5754035CABB580BA1E3C3E211F2F5BC0F9EAE2993A352BAF159A7A0CD39816739F180D758D6440D4A74368C1B2874A715ADE3BD04D6DA561EE605C78699322D574750D3A39CACA7C49BB3F4D3180DA037721F8E9145E4EA1E42F300AFFACFF80F98AFCA8974F7172135A0C94D6DA144D65AF101D8D71A141E93C7FBE739E6768ABF33C0F2E50B3EB38352B3F3F031CC0D038190716848908AA81BA324F782C5D3ECF7F130E9FE73571113E59688A1EB2418BA6186D4CE1F5040CF65F2A33E9BEDEDBF0DB6AA7A113B4BBE1E187C7315C82F6B31C845648786809F014B9E8846BD3CBBBAE7F8CC28D5784B4603D313620C482703FCB7A6D70A3039A199AA6190FC4FC75DA45DA06FD8DDBCD581D9628DFB80C6AA9ACE46EF6FF87055600AB811F48C26C4C993E844D4C02F545330BDC40EA7793947E2F4A91BB60B897FC1FE5330C11A41600000D0A300D0A0D0A"; // 完整的一个包的 16进制数据int iRtn = HtmlPacketParse(http, "ISO_8859-1", "utf-8"); if (iRtn != 0) System.out.println("iRtn : "+iRtn); else System.out.println(http.Fstr); } // main(...) // snpa_portable(SoftPerfect Network Protocol Analyzer) // 抓到的数据包(16进制的数据的字母都是大写的)(是经过gzip等压缩过的): // (1)、解压(gzip等) // (2)、编码转换 // 传入参数 _str : // TCP数据包的全部“Data”部分(注意:TCP数据包可能会分包,这里传入的是完整的一个TCP数据包的数据部分) // 传入的是 16进制的 字符串 // _strStringEncoding : http数据部分的 字符编码方式 // _strWebpageEncoding : 网页的字符编码方式 public static int HtmlPacketParse(ThttpPacketHexString _http, String _strStringEncoding, String _strWebpageEncoding) throws Exception { // ZC: 回车换行(0x0D,0x0A) ==> java中对应字符串为"\r\n" String strCRLFCRLF = "0D0A0D0A"; int iIdx = _http.Fstr.indexOf(strCRLFCRLF); if (iIdx == -1) return -1; iIdx += strCRLFCRLF.length(); // 注意:这里是 8 !!! // 此时,iIdx的前面部分是 http头,后面部分是http的数据 String strHttpHeader = _http.Fstr.substring(0, iIdx); String strHttpData = _http.Fstr.substring(iIdx, _http.Fstr.length()); //System.out.println("strHttpData : "+strHttpData); strHttpHeader = Hex2String(strHttpHeader, _strStringEncoding); //System.out.println(strHttpHeader); //System.out.println(String.format("%02X", (int)strHttpHeader.charAt(strHttpHeader.length()-1))); String strContentEncoding = "Content-Encoding"; String strAttrValue = HttpHeaderAttrValue(false, strHttpHeader, strContentEncoding); if (strAttrValue == null) return -2; if (strAttrValue.compareToIgnoreCase("gzip") != 0) return -3; // 这里,抓到的gzip数据里面都有一个“数字”,然后是0x0D0A,再然后才是 真正的gzip数据 // 一直不知道 这个“数字” 是干嘛的...(以后再 学习研究) String strCRLF = "0D0A"; iIdx = strHttpData.indexOf(strCRLF); if (iIdx == -1) return -4; String strGzipData = strHttpData.substring(iIdx + strCRLF.length(), strHttpData.length()); //System.out.println(strGzipData); _http.Fstr = HtmlGzipUncompress(strGzipData, _strWebpageEncoding); return 0; } // 16进制字符的 字符串,转 byte[] public static byte[] Hex2String(String _strHex) throws Exception { int lArrLen = _strHex.length() / 2; byte[] bytes = new byte[lArrLen]; for (int i=0; i<lArrLen; i++) { String strSub = _strHex.substring(i*2, i*2+2); int iSub = Integer.parseInt(strSub, 16); bytes[i] = (byte)(iSub & 0xFF); } return bytes; } // 16进制字符的 字符串,转 普通字符串 public static String Hex2String(String _strHex, String _strStringEncoding) throws Exception { byte[] bytes = Hex2String(_strHex); String strRst = new String(bytes, _strStringEncoding); return strRst; } // 传入参数: // _strHttpHeader : http头的字符串 // _strHttpHeaderAttrName : 要查找的 http头里面的属性名 // _bNest : 指明是否已经嵌套过一次了 public static String HttpHeaderAttrValue(boolean _bNest, String _strHttpHeader, String _strHttpHeaderAttrName) { int iIdx = _strHttpHeader.indexOf(_strHttpHeaderAttrName); if (iIdx != -1) { String strCRLF = "\r\n"; // ZC: 回车换行(0x0D,0x0A) int iIdxCRLF = _strHttpHeader.indexOf(strCRLF, iIdx); if (iIdxCRLF != -1) { String strAttr = _strHttpHeader.substring(iIdx, iIdxCRLF); iIdx = strAttr.indexOf(':'); if (iIdx != -1) return strAttr.substring(iIdx+1, strAttr.length()).trim(); } } if (! _bNest) return HttpHeaderAttrValue(true, _strHttpHeader.toLowerCase(), _strHttpHeaderAttrName.toLowerCase()); else return null; } public static String HtmlGzipUncompress(String _strGzipData, String _strWebpageEncoding) throws Exception { byte[] bytes = Hex2String(_strGzipData); bytes = UnCompress(bytes); return new String(bytes, _strWebpageEncoding); } // gzip 压缩 / 解压缩 // 压缩 public static byte[] Compress(byte[] _bytes) throws Exception { if (_bytes == null || _bytes.length == 0) return null; ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(_bytes); gzip.close(); return out.toByteArray(); } // 解压缩 public static byte[] UnCompress(byte[] _bytes) throws Exception { if (_bytes == null || _bytes.length == 0) return null; ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayInputStream in = new ByteArrayInputStream(_bytes); GZIPInputStream gunzip = new GZIPInputStream(in); byte[] buffer = new byte[256]; int n; while ((n = gunzip.read(buffer))>= 0) out.write(buffer, 0, n); // toString()使用平台默认编码,也可以显式的指定如toString("GBK") //return out.toString("ISO-8859-1"); return out.toByteArray(); } }
1.2、ThttpPacketHexString
package httpPacketEncode; public class ThttpPacketHexString { public String Fstr; }
2、JavaWeb
2.1、httpPkt.jsp
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@page import="httpPacketEncode.*"%> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <!-- 修改于 “20161102 16:31” --> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <title>Insert title here</title> <script type="text/javascript"> function ClickClearTextarea(_strTextareaId) { var textarea = document.getElementById(_strTextareaId); textarea.value = ""; } </script> </head> <body> <% String strHex = request.getParameter("hexString"); String strStringEncoding = request.getParameter("stringEncoding"); String strWebpageEncoding = request.getParameter("webpageEncoding"); // System.out.println("strHex : "+strHex); // System.out.println("strStringEncoding : "+strStringEncoding); // System.out.println("strWebpageEncoding : "+strWebpageEncoding); int iRtn = -100; ThttpPacketHexString pkt = null; if ( (strHex != null)&&(strHex.length()>0) && (strStringEncoding != null) && (strWebpageEncoding != null) ) { strHex = strHex.trim(); //strHex = new String(strHex.getBytes("ISO-8859-1"), "utf-8"); pkt = new ThttpPacketHexString(); pkt.Fstr = strHex; //System.out.println(pkt.Fstr); //iRtn = ThttpPacketEncode.HtmlPacketParse(pkt, "ISO_8859-1", "utf-8"); iRtn = ThttpPacketEncode.HtmlPacketParse(pkt, strStringEncoding, strWebpageEncoding); } %> <form action="httpPkt.jsp" method="post"> <textarea rows="20" cols="100" name="hexString" id="textareaInput"> <% if (strHex != null) out.print(strHex); %> </textarea> <br/> <input type="text" name="stringEncoding" value="ISO-8859-1" /> <input type="text" name="webpageEncoding" value="UTF-8" /> <input type="submit" value="提交" /> <input type="button" value="清空输入框" onclick="ClickClearTextarea('textareaInput')"/> <input type="button" value="清空输出框" onclick="ClickClearTextarea('textareaOutput')"/> <br/> <textarea rows="20" cols="100" id="textareaOutput" style="margin-top:3px;"> <% if (iRtn == 0) out.print(pkt.Fstr); else { if (iRtn != -100) out.print("iRtn : "+iRtn); } %> </textarea> </form> </body> </html>
3、