使用myeclipse和Apache4.3实现网络爬虫GET方法-CSDN博客

本文链接：https://blog.csdn.net/acm2014/article/details/50699363

这篇博客介绍了如何利用myeclipse开发工具和Apache 4.3版本来实现网络爬虫的GET请求。文章强调了不同Apache版本中函数和类的差异，并提供了具体实现代码。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

使用爬虫的工具的是利用myeclipse和Apache来实现，其中，Apache是有多个版本的，多个版本之间有很多函数和类是不同的，需要根据具体使用的版本号来选取对应的函数。这里使用的是Apache4.3。具体实现代码如下：

package test123;
/
//Date:2016.2.19
//Writer:P.C.
//Description: 使用Get方法获取网页源码

import java.io.IOError;
import java.io.IOException;

import org.apache.http.HttpEntity;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
//import org.apache.http.impl.client.DefaultHttpClient;//本类包内的内容可以使用，所以画横线，但已不推荐使用
import org.apache.http.util.EntityUtils;





//public class initial {
//	public static void main(String[] argx){
//	String path="http://www.***.com";
//	System.out.println(path);
//	try {
//		   URL pageURL=new URL("http://www.baidu.com");
//		  } catch (Exception e) {
//		   e.printStackTrace();
//		  }
//	System.out.println(path);
//	}
//	
//}
@SuppressWarnings("deprecation")
public class initial{
	public static void main(String[] argx){
		String result = null;
	
		CloseableHttpClient httpClient=HttpClients.createDefault();//创建一个客户端，类似于浏览器
		HttpGet getmethod=new HttpGet("http://www.***.com");//创建一个get方法,类似于在浏览器中输入网址
		try{
		HttpResponse httpResponse=httpClient.execute(getmethod);//相当于在输入网址后敲回车
		if(httpResponse.getStatusLine().getStatusCode()==200)//状态码，200表示请求成功
		{
			HttpEntity httpEntity = httpResponse.getEntity();  //即可以是数据流也可以是字符串形式，根据需要设定
            result = EntityUtils.toString(httpEntity);//EntityUtils对象是org.apache.http.util下的一个工具类，用官方的解释是为HttpEntity对象提供的静态帮助类
            //result.replaceAll("\r", "");
		}
		else//当页面未正确打开时，如响应吗为201……，显示错误
		{
			result="ERROE";
			
		
		}
		System.out.println(result);//输出结果，可以与原网址的源码进行对比，如取其中的一句进行比对
		}catch(ClientProtocolException e){//客户端协议异常
			e.printStackTrace();  
            result = e.getMessage().toString();
            System.out.println(result);
		}catch(IOException e){
			e.printStackTrace();  
            result = e.getMessage().toString(); 
            System.out.println(result);
		}
		}
	}

网络爬虫之get方法