package com.xgb.task;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* @Program: verify
* @Description: 拉取网页内容
* @Author: Mr.Lidong
* @CreateDate: 2022-08-09 17:11
**/
public class Test {
public static void main(String[] args) {
String url = "https://lanzhou.11467.com/xigu/pn2";
String content = getHtml(url);//返回整个页面的数据 (String类型)
Document doc = Jsoup.parse(content);//将字符内容解析成一个Document类(Jsoup解析html)
Elements links=doc.select(".f_l");//找到页面带有onclick属性的a元素
for( Element element : links ){
System.out.println(element);//带有onclick属性的a元素
System.out.println("==============================================");
System.out.println("公司名称:"+element.select("a").text());
System.out.println(element.select("div>div").eq(0).text());
System.out.println(element.select("div>div").eq(1).text());
System.out.println(element.select("div>span").eq(0).text());
System.out.println(element.select("div>span").eq(1).text());
System.out.println("==============================================");
}
}
public static String getHtml(String url){
BufferedReader in = null;
//定义字符缓冲区
StringBuffer stringBuffer = new StringBuffer();
try {
//创建URL地址
URL net = new URL(url);
//打开URL
URLConnection connection = net.openConnection();
//开启输入/输出。注意:请求网络需要传参必须开启
//connection.setDoOutput(true);
//connection.setDoInput(true);
//实例化字符缓冲输入流来读取数据
in = new BufferedReader(
new InputStreamReader(connection.getInputStream(),"utf-8")
);
String line = null;
while ((line = in.readLine())!=null) {
//用stringBuffer拼接数据
stringBuffer.append(line);
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("网络超时");
} finally {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return stringBuffer.toString();
}
}
pom依赖:
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.2</version>
</dependency>