JAVA爬取单词音标，特此记录_java用有道云接口抓取音标-CSDN博客

本文链接：https://blog.csdn.net/qq_40173438/article/details/119519776

最近想搞一个英语词典，市面上的词典，要么广告太多且收费，要么不能成套成体系的提供各阶段的单词。于是写了个爬虫，爬取了市面上的一些单词，此记录主要为爬取单词音标方面，特此记录，以便后续查询（ps：大家在写爬虫时尽量使用python，总感觉java写的爬虫效率比较低，且非常消耗内存。怪怪的）

1.组装url，接口地址+单词

//有道api  美式：type=0   英式：type=1
 String baseUrl = "http://dict.youdao.com/dictvoice?type=1&audio=";
 //获取要爬取的单词列表
 List<String> wordList = lianCiZhenti5500Mapper.findVcVocabulary();
 //处理后的单词
 String word = null;
 //源单词
 String word_o =null;
 String wordUrl =null;
 for (int i=0;i<wordList.size();i++){
     word_o = wordList.get(i);
     //犹豫有些词组中间会有空格，在请求到url时不能被识别，所以词组中的空格进行转移，%20代表空格
     word = word_o.replaceAll(" ", "%20");
     //单词的url为，接口地址加单词
     wordUrl = baseUrl+ word;
     //调用http方法
     DownloadUtils downloadUtils  = new DownloadUtils(wordUrl, word_o, "mp3","H:\\wordMp3\\words");
     try {
         downloadUtils.httpDownload();
         System.out.print("\t \t \t下载成功");
     } catch (Exception e) {
         System.out.print("\t \t \t下载失败");
         e.printStackTrace();
     }
 }

2. 使用http下载

//通过http现在所需要的音标mp3格斯
public class DownloadUtils {
    // 目标链接字符串
    private String wordUrl;
    // 单词字符串
    private String wordString;
    // 目标文件的格式
    private String targetType;
    // 存放文件路径
    private File rootDir;

    public DownloadUtils(String wordUrl, String wordString, String targetType, File rootDir) {
        super();
        this.wordUrl = wordUrl;
        this.wordString = wordString;
        this.targetType = targetType;
        this.rootDir = rootDir;
    }

    public DownloadUtils(String wordUrl, String wordString, String targetType, String rootDir) {
        super();
        this.wordUrl = wordUrl;
        this.wordString = wordString;
        this.targetType = targetType;
        this.rootDir = new File(rootDir);
    }

    public DownloadUtils() {
        super();
    }

    /**
     * 开始下载
     *
     * @throws Exception
     */
    public void httpDownload() throws Exception {
        validate();
        final String urls = wordUrl;
        HttpURLConnection urlConnection;
        urlConnection = (HttpURLConnection) new URL(urls)
                .openConnection();
        // 开启链接
        urlConnection.connect();
        InputStream inputStream = urlConnection.getInputStream();

        if(!rootDir.exists()){
            rootDir.mkdirs();
        }

        File temp = new File(rootDir,
                wordString + "." + targetType);

        //if (!temp.exists()) {
        temp.createNewFile();
        //}
        FileOutputStream fileOutputStream = new FileOutputStream(temp, true);
        int tem;
        while (-1 != (tem = inputStream.read())) {
            fileOutputStream.write(tem);
            fileOutputStream.flush();
        }
        fileOutputStream.close();
        inputStream.close();
    }

    private void validate() throws Exception {
        if (wordUrl == null || wordUrl.equals("")) {
            throw new Exception("下载路径不能为空!");
        }
        if (null == rootDir ) {
            throw new Exception("目标文件夹不存在!");
        }

    }

}