在这次抓取海大教务系统的课表的过程中,可以发现验证码几乎起不了作用,照样能抓得到课表。因此只是把一些代码在这里共享一下,每个学校的有每一个学校的抓取方式,我这里我只是获取的html代码之后,通过自己去解析获得,由于核心的代码是在团队中时保密的,所以核心代码就不展示。
public class httpConn {
List<Cookie> cookies;
String Sessionid;
HttpResponse httpResponse2;
Handler mHandler;
User userlogin;
public httpConn(String user,String password, CoursedbManager dao, Handler mHandler)
{
this.mHandler=mHandler;
String uriAPI = "http://210.38.137.101:9080/xsxt/xsxt.jsp";
try {
Message msg=new Message();
HttpClient client =new DefaultHttpClient();
HttpResponse httpResponse;
HttpPost httpRequest = new HttpPost(uriAPI);
List<NameValuePair> params = new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("userId", user));
params.add(new BasicNameValuePair("userPass",password));
httpRequest.setEntity(new UrlEncodedFormEntity(params,HTTP.UTF_8));
httpResponse = client.execute(httpRequest);
cookies =((AbstractHttpClient) client).getCookieStore().getCookies();
if (httpResponse.getStatusLine().getStatusCode() == 200) {
for(int i=0;cookies!=null&&i<cookies.size();i++)
{
Sessionid=cookies.get(i).getValue();
}
}
String result = "";
String pwd =CourseUtils.generatePassword(password) ;
String uriAPI2 = "http://210.38.137.101/pls/wwwbks/bks_login2.uniteLogin?stuid="+user+"&pwd="+pwd;
HttpGet httpRequest2=new HttpGet(uriAPI2);
HttpClient clientget=new DefaultHttpClient();
HttpResponse httpResponse2 = clientget.execute(httpRequest2);
List<Cookie> cookies2=((AbstractHttpClient) clientget).getCookieStore().getCookies();
if(cookies2.size()==0)
{
msg.what=1;
mHandler.sendMessage(msg);
return;
}
String sessionid2= cookies2.get(0).getValue();
uriAPI2="http://210.38.137.101/pls/wwwbks/xk.CourseView";
httpRequest2=new HttpGet(uriAPI2);
httpRequest2.addHeader("Cookie","ACCOUNT="+sessionid2+"; JSESSIONID="+Sessionid);
httpResponse2=clientget.execute(httpRequest2);
if (httpResponse2.getStatusLine().getStatusCode() == 200) {
// dao.DeleteAllLessen();
// dao.DeleteUser();
StringBuffer sb = new StringBuffer();
InputStream is = httpResponse2.getEntity().getContent();
BufferedReader br = new BufferedReader(new InputStreamReader(is, "gb2312"));
String data = "";
while ((data = br.readLine()) != null)
{
sb.append(data);
}
result = sb.toString();
String html= filterHtml(result,dao);//这里开始解析返回的代码
//
msg.what=2;
mHandler.sendMessage(msg);
}
} catch (Exception e) {
e.printStackTrace();
}
}