package com.craw.start;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class CrawStart {
public static void main(String[] args) throws IOException {
URL url = new URL("http://www.baidu.com");
String parentFile = "E:\\craw";
HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
int statusCode = httpURLConnection.getResponseCode();
InputStream inputStream = httpURLConnection.getInputStream();
OutputStream outputStream = null;
byte[] bytes = new byte[1024];
if(statusCode == 200) {
File file = new File(parentFile,url.getRef()+".html");
if(!file.exists()) {
file.createNewFile();
}
int length = 0;
outputStream = new FileOutputStream(file);
while((length = inputStream.read(bytes)) != -1) {
outputStream.write(bytes, 0, length);
String context = new String (bytes,"UTF-8");
System.out.println(context);
}
outputStream.close();
}
}
}
Craw 第一个很简单的爬虫
最新推荐文章于 2023-10-04 09:39:33 发布