/**
* 网页爬虫代码
* @author Administrator
*如果爬取的代码里面出现乱码,检查下你的eclipse的编码格式,默认的编码是GBK,可以改成UTF-8一般**就不会出现
*/
public class Main {
public static void main(String[] args) {
//定义即将访问的链接
String url = "http://www.tuiyiizx.com";
//定义一个字符串用来存储网页内容
String result = "";
//定义各一个缓冲字符输入流
BufferedReader in = null;
//将string转化为url对象
URL realUrl;
try {
realUrl = new URL(url);
//初始化一个链接到url的连接
URLConnection connection = realUrl.openConnection();
//开始实际链接
connection.connect();
//初始化Bufferedreader输入流来读取URL响应
in =new BufferedReader
(new InputStreamReader
(connection.getInputStream()));
//用于临时存储抓取到的每一行数据
String line;
while((line = in.readLine())!=null){
result += line;
}
} catch (Exception e) {
System.out.println("发送GET请求出现异常!"+e);
e.printStackTrace();
}finally{
try {
if(in !=null){
in.close();
}
} catch (IOException e2) {
e2.printStackTrace();
}
System.out.println(result);
}
}
}
************************************************************
***************************************************************
public class test01 {
@Test
public void test01() throws ClientProtocolException, IOException{
//1.定义请求路径
String url = "http://www.tuiyizx.com/";
//2.创建httpClient
CloseableHttpClient client =
HttpClients.createDefault();
//3.创建请求的方式get post
HttpGet httpGet = new HttpGet(url);
//4.发起请求 获取相应信息
CloseableHttpResponse httpResponse =
client.execute(httpGet);
//5.检测状态码信息是否是200
if(httpResponse.getStatusLine().getStatusCode()==200){
String result =
EntityUtils.toString(httpResponse.getEntity());
//6.获取返回值有效数据}
System.out.println(result);
}
}
}