java爬虫
可以尝试一下java爬虫,爬取豆瓣电影榜单。用上正则表达式,结合一下gui
最初版本
import java.net.*;
import java.util.*;
import java.nio.charset.*;
import java.io.*;
import java.math.*;
public class Test
{
public static void main(String[] args) throws Exception
{
//Java爬虫
/*
URL url4 = new URL("http://www.baidu.com#aaa");
URLConnection conn = url4.openConnection();
conn.connect();
System.out.println(conn.getContent());
InputStream input = url4.openStream();
byte[] buffer = new byte[1024];
int hasRead = -1;
while((hasRead = input.read(buffer)) != -1)
{
System.out.println(new String(buffer,Charset.forName("utf-8")));
}
*/
URL url = new URL("https://movie.douban.com/chart");
URLConnection conn = url.openConnection();
conn.setRequestProperty("accept","*/*");
conn.setRequestProperty("connection","Keep-Alive");
conn.setRequestProperty("user-agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36");
conn.connect();
InputStream input = conn.getInputStream();
byte[] buffer = new byte[1024];
int hasRead = -1;
int length = 0;
String msg = "";
//输出到一个txt文件中
//FileWriter fw = new FileWriter("E:\\java\\practice\\seven\\豆瓣电影排行榜.txt");
RandomAccessFile raf = new RandomAccessFile("E:\\java\\practice\\seven\\豆瓣电影排行榜.txt","rw");
while((hasRead =input.read(buffer)) != -1)
{
raf.write(buffer);
length += hasRead;
System.out.println("爬取进度:"+length);
}
raf.close();
System.out.println("爬取完成!");
}
}