HttpClient + Jsoup 网页数据抓取



import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;


import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;




public class baixingwang {

String url="http://shenzhen.baixing.com/";
String info="";
HttpClient client=new HttpClient();
GetMethod get=null;

public void  statPage(){
// get.addRequestHeader("Content-Type", "text/html;charset=UTF-8");
// get.addRequestHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
// get.addRequestHeader("", "");
// get.addRequestHeader("", "");
try {
get=new GetMethod(url);
int statcode=client.executeMethod(get);
if(statcode==200){
info=get.getResponseBodyAsString();
jiexi(info);
}else{
System.out.println("statcode is not 200!");
throw new Exception("statcode is not 200!");
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

   public void jiexi(String info){
  Document document=Jsoup.parse(info);
  Elements ele=document.select("section[class=category-col category-cheliang home-col-1]")
  .select("a");
  String url1 = "http://shenzhen.baixing.com"+ele.get(1).attr("href")+"?afo=q9b";
  getErShouCheContent(url1);
   }
   public void getErShouCheContent(String h_url){
  String content="";
  GetMethod get1=new GetMethod(h_url);
try {
int statcod = client.executeMethod(get1);
if(statcod==200){
content=get1.getResponseBodyAsString();
getInfo(content);
}
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
   }
   
   public void getInfo(String content){
  Document doc=Jsoup.parse(content);
  Elements eles= doc.select("ul[class=list-ad-items]")
  .select("li");
  Elements ele1 = eles.get(0).select("div[class=media-body]");
  String str1 = ele1.select("span").text();
  String str2=ele1.select("a").get(0).text();
  String str3=ele1.select("div[class=ad-item-detail]").get(0).text();
  String str4=ele1.select("div[class=ad-item-detail]").get(1).text();
  StringBuilder sb=new StringBuilder();
  sb.append(str1);
  sb.append(str2);
  sb.append(str3);
  sb.append(str4);
  
  File file = new File("D://abc/a/");
  try {
  if(!file.exists()){
file.mkdirs();
  }
  File file_n = new File("D://abc/a/b.txt");
  FileWriter writer=new FileWriter(file_n.getAbsoluteFile(),true);
  BufferedWriter bw=new BufferedWriter(writer);
  bw.write(sb.toString());
  bw.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
   } 

public static void main(String[] args) {
baixingwang bz=new baixingwang();
bz.statPage();


}


}
已标记关键词 清除标记
相关推荐
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页