Java模拟登陆爬取页面信息HttpClient,Jsoup解析html

Jsoup下载位置https://jsoup.org/download

 

首先要在F12中找出发送请求的位置,图一图二找到url,图三模拟登陆时的账号密码参数名。

 

-------------------------------------------------------------后台代码-----------------------------------------------------------------------------------

import com.gx.soft.common.util.DateFormatUtil;
import com.gx.soft.monitor.persistence.domain.YcMonitor;
import com.gx.soft.monitor.persistence.manager.YcMonitorManager;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
 

@Scheduled(cron = "0 0/5 * * * ? ")
    @RequestMapping(value = "yc", method = RequestMethod.GET, produces = "application/json")
    public void inputYc() {
        try {
            HttpClient client=new DefaultHttpClient();
            HttpPost httpPost = new HttpPost("http://web.tlink.io/user/login.htm");//url
            List<NameValuePair> nvp = new ArrayList<NameValuePair>();
            nvp.add(new BasicNameValuePair("loginAccount","************"));  //账号名
            nvp.add(new BasicNameValuePair("loginPassword", "************"));//密码
            httpPost.setEntity(new UrlEncodedFormEntity(nvp, HTTP.UTF_8));
            HttpResponse response1 = client.execute(httpPost);
            httpPost.abort();//关闭httppost,不关闭的话下面使用httpget会报错
            if (response1.getStatusLine().getStatusCode() == 200) {//使用httppost执行,会导致302重定向,response中会包含重定向的地址yyy,需使用get方式访问
                HttpGet httpget = new HttpGet("http://web.tlink.io/user/userIndex.htm");
                HttpResponse response = client.execute(httpget);
                String entity = EntityUtils.toString (response.getEntity(),"utf-8");
//                System.out.println(entity);//输出的就是html的内容
                Document document=Jsoup.parse(entity);         //使用Jsoup获取dom树结构
                List<String> list=new ArrayList<>();
                for(int i=1;i<10;i++) {
//                    System.out.println(document.select("span[id='s_22202"+i+"']").first().toString());
                    list.add(document.select("span[id='s_22202" + i + "']").first().text());
                }
                YcMonitor ycMonitor = new YcMonitor();
                ycMonitor.setBdId("010802");
                ycMonitor.setTemperature(list.get(0));
                ycMonitor.setHumidity(list.get(1));
                ycMonitor.setSpeed(list.get(2));
                ycMonitor.setWind(list.get(3)+"度");
                ycMonitor.setPm25(list.get(5));
                ycMonitor.setPm10(list.get(6));
                ycMonitor.setDb(list.get(7));
                ycMonitor.setRowId(null);
                ycMonitor.setTime(document.select("span[id='st_222021']").first().text());
                ycMonitor.setLocalRecordTime(DateFormatUtil.convertUtilDateToString(new Date()));
                ycMonitorManager.save(ycMonitor);
            }else {
                System.out.println("失败");
            }
        }catch (Exception e){
            e.printStackTrace();
        }
    }
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值