【Java爬虫】爬取南通大学教务系统成绩计算绩点

  以前写过一个python版的,但是想做一个jsp网页版的,就又用Java有写了一下。

  具体地址的分析过程在这里,这里简单说一下HttpCliet的Get,Post方法的使用

           1.Get请求方法

//创建一个浏览器客户端
CloseableHttpClient httpClient = HttpClients.createDefault();
//要Get的地址
String url1="http://www.baidu.com";				
//创建一个Get请求
HttpGet baidu=new HttpGet(url1);
//用上面创建的浏览器客户端执行该请求
CloseableHttpResponse res=httpClient.execute(baidu);
//用响应创建一个http实体并获得输入流
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
//将获得的流写到本地磁盘
FileOutputStream out=new FileOutputStream("baidu.html'");
byte[] buffer=new byte[1024];
int count=-1;
while((count=in.read(buffer))!=-1)
{
	out.write(buffer, 0, count);
}
in.close();
out.close();

   2.Post请求方法

CloseableHttpClient httpClient = HttpClients.createDefault();
String url="http://××××.××××.com?#";	
//要提交的参数username,password				
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("Username","Name"));
list.add(new BasicNameValuePair("Password","××××××"));
//转换编码
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); 
//创建Post请求
HttpPost httpPost=new HttpPost(url);
//为请求设置参数
httpPost.setEntity(entity);
//获得响应,输入流并写入本地磁盘
CloseableHttpResponse res=httpClient.execute(httpPost);
HttpEntity he=res.getEntity();
InputStream in=he.getContent();
FileOutputStream out=new FileOutputStream("××××.×××");
byte[] buffer=new byte[1024];
int count=-1;
while((count=in.read(buffer))!=-1)
{
	out.write(buffer, 0, count);
}
in.close();
out.close();

爬虫的完整代码:

import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.*;


public class spider02 {
	public static void main(String[] args) throws ClientProtocolException, IOException
	{
		@SuppressWarnings("resource")
		
		Scanner cin=new Scanner(System.in);							
		doon asd=new doon();
		asd.getyzm();
		String yzm=cin.nextLine();						//测试
		String stop="1";
		while(!stop.equals("#"))
		{
			stop=cin.nextLine();
			System.out.println(stop);
			if(stop.equals("n"))
			{
				Matcher name=asd.patternname(asd.getname());
				while(name.find())
					System.out.println(name.group(1));
			}
			if(stop.equals("s"))
			{
					Matcher score=asd.patternscore(asd.getscore());
					List<lession>	les=	asd.workjidian(score);
					double jdsum=0,xfsum=0;
					for(int i=0;i<les.size();i++)
					{
						jdsum+=les.get(i).getKcxfjd();
						xfsum+=Double.valueOf(les.get(i).getXf()).doubleValue();
						System.out.println(les.get(i).getKcmc()+"\t"+les.get(i).getZpcj()+"\t"+les.get(i).getXf()+"\t"+les.get(i).getKcxfjd());
					}
					System.out.println("所修课程学分:"+xfsum);
					System.out.println("所修课程学分绩点:"+jdsum);
					System.out.println("平均学分绩点:"+jdsum/xfsum);
			}
		}
	}
}
class doon{
	private CloseableHttpClient httpClient = HttpClients.createDefault();
	public  void done(String xh,String sfzh,String kl,String yzm) 
	{
		try {
			login(xh, sfzh, kl, yzm);	//尝试登陆
			getscore();											//获取分数
		} catch (ClientProtocolException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public String getname()
	{
		String url="http://jwgl.ntu.edu.cn/cjcx/QueryAll.aspx";			//获取个人信息位置
		String information="";
		//Post请求
		List<NameValuePair> list=new ArrayList<NameValuePair>();
		list.add(new BasicNameValuePair("xq","2013-2014-1"));
		try {
			UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
			HttpPost post=new HttpPost(url);
			post.setEntity(entity);
			CloseableHttpResponse res= httpClient.execute(post);
			HttpEntity he=res.getEntity();
			InputStream in=he.getContent();
			//FileOutputStream out=new FileOutputStream("");
			byte[] buffer=new byte[1024];
			int count=-1;
			while((count=in.read(buffer))!=-1)
			{
				String inf=new String(buffer,0,count);
				information+=inf;
			}
			in.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return information;
	}
	public  void getyzm() throws IOException
	{
		//获得验证码并写到本地,Get请求
		String url1="http://jwgl.ntu.edu.cn/cjcx/checkImage.aspx";					//验证码页面
		HttpGet yzm=new HttpGet(url1);
		CloseableHttpResponse res=httpClient.execute(yzm);
		HttpEntity he=res.getEntity();
		InputStream in=he.getContent();
		FileOutputStream out=new FileOutputStream("yzm.gif");
		byte[] buffer=new byte[1024];
		int count=-1;
		while((count=in.read(buffer))!=-1)
		{
			out.write(buffer, 0, count);
		}
		in.close();
		out.close();
	}
	public  void login(String xh,String sfzh,String kl,String yzm) throws ClientProtocolException, IOException
	{
		//Post请求
		String url="http://jwgl.ntu.edu.cn/cjcx/Default.aspx";								//登录页面
		List<NameValuePair> list = new ArrayList<NameValuePair>();
		list.add(new BasicNameValuePair("__VIEWSTATE","/wEPDwUJODExMDE5NzY5ZGRgtUdRucUbXsT8g55XmVsTwV6PMw=="));
		list.add(new BasicNameValuePair("__VIEWSTATEGENERATOR","6C0FF253"));
		list.add(new BasicNameValuePair("xh",xh));
		list.add(new BasicNameValuePair("sfzh",sfzh));
		list.add(new BasicNameValuePair("kl",kl));
		list.add(new BasicNameValuePair("yzm",yzm));
		UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); 
		HttpPost httpPost=new HttpPost(url);
		httpPost.setEntity(entity);
		CloseableHttpResponse res=httpClient.execute(httpPost);
		HttpEntity he=res.getEntity();
		InputStream in=he.getContent();
		FileOutputStream out=new FileOutputStream("ans.html");
		byte[] buffer=new byte[1024];
		int count=-1;
		while((count=in.read(buffer))!=-1)
		{
			out.write(buffer, 0, count);
		}
		in.close();
		out.close();
	}
	public  String getscore() throws ClientProtocolException, IOException
	{
		//Post请求
		String url="http://jwgl.ntu.edu.cn/cjcx/Data/ScoreAllData.aspx";  //获取分数
		List<NameValuePair> list = new ArrayList<NameValuePair>();
		list.add(new BasicNameValuePair("start","0"));
		list.add(new BasicNameValuePair("pageSize","80"));
		UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); 
		HttpPost httpPost=new HttpPost(url);
		httpPost.setEntity(entity);
		CloseableHttpResponse res=httpClient.execute(httpPost);
		HttpEntity he=res.getEntity();
		InputStream in=he.getContent();
		FileOutputStream out=new FileOutputStream("score.html");
		byte[] buffer=new byte[1024];
		int count=-1;
		String save="";
		while((count=in.read(buffer))!=-1)
		{
			out.write(buffer, 0, count);
			String sav=new String(buffer,0,count);
			save+=sav;
		}
		in.close();
		out.close();
		return save;
	}
	public Matcher patternscore(String score)
	{
		//用正则表达式匹配成绩
		String reg="\"kcmc\":\"(.*?)\",\"jsxm\":\"(.*?)\",\"xq\":\"(.*?)\",\"xs\":\"(.*?)\",\"xf\":\"(.*?)\",\"zpcj\":\"(.*?)\",\"pscj\":\"(.*?)\",\"qmcj\":\"(.*?)\",\"kcsx\":\"(.*?)\",\"cjid\":\"(.*?)\",\"ksfsm\":\"(.*?)\",\"pxcj\":\"(.*?)\"}";
		Pattern p=Pattern.compile(reg);
		Matcher m=p.matcher(score);
		return m;
	}
	public Matcher patternname(String name)
	{
		//匹配个人信息
		String reg="<b>(.*?)</b>";
		Pattern p=Pattern.compile(reg);
		Matcher  m=p.matcher(name);
		return m;
	}
	public List<lession> workjidian(Matcher score)
	{
		//计算绩点
		List<lession> les=new ArrayList<lession>();
		while(score.find())
		{
			double	xf=0.0;
			if(score.group(6).equals("优"))				//五级计分
				xf=Double.valueOf(score.group(5)).doubleValue()*4.5; 				
			else if(score.group(6).equals("良"))
				xf=Double.valueOf(score.group(5)).doubleValue()*3.5; 	
			else if(score.group(6).equals("中"))
				xf=Double.valueOf(score.group(5)).doubleValue()*2.5; 	
			else if(score.group(6).equals("及格"))
				xf=Double.valueOf(score.group(5)).doubleValue()*1.5; 	
			else if(score.group(6).equals("缓考")||score.group(6).equals("不及格"))
				continue;
			else if(Double.valueOf(score.group(6)).doubleValue()>=90)					//百分计分
				xf=((Double.valueOf(score.group(6)).doubleValue()-90)/10+4.0)*Double.valueOf(score.group(5)).doubleValue();
			else if(Double.valueOf(score.group(6)).doubleValue()>=80&&Double.valueOf(score.group(6)).doubleValue()<=89)	
				xf=((Double.valueOf(score.group(6)).doubleValue()-80)/10+3.0)*Double.valueOf(score.group(5)).doubleValue();
			else if(Double.valueOf(score.group(6)).doubleValue()>=70&&Double.valueOf(score.group(6)).doubleValue()<=79)	
				xf=((Double.valueOf(score.group(6)).doubleValue()-70)/10+2.0)*Double.valueOf(score.group(5)).doubleValue();
			else if(Double.valueOf(score.group(6)).doubleValue()>=60&&Double.valueOf(score.group(6)).doubleValue()<=69)	
				xf=((Double.valueOf(score.group(6)).doubleValue()-60)/10+1.0)*Double.valueOf(score.group(5)).doubleValue();
			else if(Double.valueOf(score.group(6)).doubleValue()<60)
				continue;
			les.add(new lession(score.group(1),score.group(2),score.group(3),score.group(4),score.group(5),score.group(6),score.group(7),score.group(8),score.group(9),score.group(10),score.group(11),score.group(12),xf));

		//	System.out.println(score.group(1)+"\t\t\t\t\t\t"+score.group(2)+"\t"
		//			+score.group(5)+"\t"+score.group(6)+"学分"+Double.toString(xf));
		}
		return les;
	}
}



  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值