一直对爬虫很感兴趣,四个月前就想有一个能够批量获取单词的发音的爬虫。前两天学校里有一个爬虫培训课,听课后就写了这么一个小小的爬虫。
功能:将指定一批单词的发音下载到本地文件系统中。
下载到指定文件夹中:
程序如下:
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 第一只爬虫
* 爬取指定单词的发音
* @author imyLove7
*
*/
public class Demo2 {
public static void main(String[] args) {
try {
//正则表达式字符串
String str;
String line;
//读取文件的当前行
String currentLine = "";
//服务器数据源
String source;
//目标链接头部
String urlHead;
InputStream is;
//目标文件链接
String targetUrl;
//目标文件名称
String targetName;
//用于区分英音、美音
boolean flag = true;
StringBuilder builder;
urlHead= "";
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream("E:\\word.txt"),"UTF-8"));
currentLine = reader.readLine();
while((currentLine = reader.readLine()) != null) {
//拼接目标URL
targetUrl = urlHead + currentLine;
System.out.println("当前:" + currentLine);
//创建一个URL对象
URL url = new URL(targetUrl);
//打开连接
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
//建立与远端服务器的连接
urlConnection.connect();
System.out.println("连接成功!");
//获取服务器的数据流
is = urlConnection.getInputStream();
//连接服务器得到的数据是通过字节流获取的,需要将字节流转换成字符流才能读清服务器的数据
//字符缓冲流的创建依赖于字符流,字符流的创建依赖于字节流
//将字节流转换成字符缓冲流
BufferedReader reader2 = new BufferedReader(new InputStreamReader(is));
//创建缓冲字符串,用于拼接读取到的所有数据
builder = new StringBuilder();
line = null;
while((line = reader2.readLine()) != null) {
builder.append(line);
}
reader2.close();
source = builder.toString();
//构造正则表达式
str = "";
Pattern pattern = Pattern.compile(str);
Matcher matcher = pattern.matcher(source);
while(matcher.find()) {
//获取对应的数据
String mp3 = matcher.group();
//构造目标文件名
if(flag) {
System.out.println("US");
flag = false;
targetName = currentLine + "_" + "ame";
}
else {
System.out.println("UK");
flag = true;
targetName = currentLine + "_" + "bre";
}
//截取mp3目标链接
String mp3Url = mp3.substring(mp3.indexOf(""), mp3.indexOf(""));
System.out.println(mp3Url);
DownloadUtils downloadUtils = new DownloadUtils(mp3Url, targetName, "mp3", "E:\\word\\22");
try {
downloadUtils.httpDownload();
} catch (Exception e) {
e.printStackTrace();
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
System.out.println("任务执行完毕!");
}
}
}
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
/**
* 下载工具类
* @author imyLove7
*
*/
public class DownloadUtils {
private String downloadPath = null;
private String targetName = null;
private String targetType = null;
private File savedPath = null;
public DownloadUtils() {
super();
}
public DownloadUtils(String downloadPath, String targetName, String targetType, String savedPath) {
super();
this.downloadPath = downloadPath;
this.targetName = targetName;
this.targetType = targetType;
this.savedPath = new File(savedPath);
}
public DownloadUtils(String downloadPath, String targetName, String targetType, File savedPath) {
super();
this.downloadPath = downloadPath;
this.targetName = targetName;
this.targetType = targetType;
this.savedPath = savedPath;
}
public String getDownloadPath() {
return downloadPath;
}
public void setDownloadPath(String downloadPath) {
this.downloadPath = downloadPath;
}
public String getTargetName() {
return targetName;
}
public void setTargetName(String targetName) {
this.targetName = targetName;
}
public String getTargetType() {
return targetType;
}
public void setTargetType(String targetType) {
this.targetType = targetType;
}
public File getSavedPath() {
return savedPath;
}
public void setSavedPath(File savedPath) {
this.savedPath = savedPath;
}
public void httpDownload() throws Exception {
//非法判断
if (downloadPath == null) {
throw new Exception("非法!下载路径为空!");
}
if (savedPath == null || !savedPath.exists() || !savedPath.isDirectory()) {
throw new Exception("非法!保存路径为空或者不存在!");
}
HttpURLConnection urlConnection;
urlConnection = (HttpURLConnection) new URL(downloadPath).openConnection();
urlConnection.connect();
InputStream inputStream = urlConnection.getInputStream();
File file = new File(savedPath, targetName + "." + targetType);
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream fileOutputStream = new FileOutputStream(file, true);
int temp;
while ((temp = inputStream.read()) != -1) {
fileOutputStream.write(temp);
fileOutputStream.flush();
}
fileOutputStream.close();
inputStream.close();
}
}