Java大作业

能饮一杯吴

已于 2022-10-03 21:30:12 修改

阅读量458

点赞数 1

分类专栏： java 文章标签： java tomcat jvm

于 2022-10-03 21:25:17 首次发布

本文链接：https://blog.csdn.net/weixin_55466841/article/details/127060568

版权

java 专栏收录该内容

6 篇文章 1 订阅

订阅专栏

搭建tomcat静态网站JavaFx API docs。分析首页index.html，提取javaFx所有类名及其包名，存到一个文档中，文档的每行内容：包名：类名。

搭建tomcat
在linux下搭建
从官网下载压缩包
在这里插入图片描述

tar zxvf apache-tomcat-8.5.82.tar.gz  #解压缩
mv apache-tomcat-8.5.82  tomcat8.5.82  #重命名

#vi编辑配置文件/etc/profile
#打开后，在文档最下方添加以下环境变量配置代码
export CATALINA_HOME=/usr/local/software/tomcat8.5.82
source /etc/profile #使配置生效

cd /usr/tomcat8.5.82/bin
./startup.sh

然后把下载好的JavaFxAPI文件放到tomcat的webapp目录下就好了。

主要用到了Jsoup，一些常用的正则表达式。
先分析一下页面源码
在首页有三个链接
http://localhost:8080/api/overview-frame.html
在这里插入图片描述
http://localhost:8080/api/allclasses-frame.html
http://localhost:8080/api/overview-summary.html
第一个链接有所有的类，所以我用第一个链接爬取的。
从第一个页面爬到的URL是包框架，细节需要访问每个URL，所以把最后一个反斜杠后的文件名替换为href中的文件名，例如：
http://localhost:8080/api/javafx/animation/package-frame.html
http://localhost:8080/api/javafx/animation/Animation.html

package org.example;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class App{
    static String baseURL="http://localhost:8080/api/";
    public static void main(String[] args) throws IOException {
        String url="http://localhost:8080/api/overview-frame.html";
        File file=new File("API.txt");
        BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
        Parse(url,bw);
        bw.flush();
        bw.close();
    }
    private static String MatchURL(String URL,String href){
        int x=URL.lastIndexOf("/");
        String newURL=URL.substring(0,x)+"/"+href;
        //System.out.println(newURL);
        return newURL;
    }
    //过滤掉没用的URL
    private static boolean check(String URL){
        String regex=".*javafx.*";
        Pattern pat=Pattern.compile(regex);
        Matcher mat=pat.matcher(URL.trim());
        return mat.matches();
    }
    //正则表达式检验是否是一个正确的URL
    private  static boolean checkURL(String URL){
        String regex = "(ht|f)tp(s?)\\:\\/\\/[0-9a-zA-Z]([-.\\w]*[0-9a-zA-Z])*(:(0-9)*)*(\\/?)([a-zA-Z0-9\\-\\.\\?\\,\\'\\/\\\\&%\\+\\$#_=]*)?";
        Pattern pat=Pattern.compile(regex);
        Matcher mat=pat.matcher(URL.trim());
        return mat.matches();
    }
    private static void Parse(String url,BufferedWriter bw) throws IOException {
        Connection.Response document = Jsoup.connect(url).timeout(4000).userAgent("Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15").ignoreContentType(true).execute();              //ignoreContentType  设置忽略内容类型检查，如果不设置，在爬到js页面时会报错
        Elements hrefs=document.parse().select("[href]");

        for(Element href:hrefs){
            String URL=baseURL+href.attr("href");           //把框架页面的URL换成类页面的URL
            if(checkURL(URL)&&check(URL)){
                System.out.println(URL);
                Connection.Response doc= (Connection.Response) Jsoup.connect(URL).ignoreContentType(true).execute();
                Elements h=doc.parse().select("a[href]");

                //遍历所有的方法
                for(int i=0;i<h.size();i++){
                    if(i<2)
                        continue;

                    String hhre=h.get(i).attr("href");
                    String newURL=MatchURL(URL,hhre);
                    Connection.Response d=Jsoup.connect(newURL).ignoreContentType(true).execute();
                    Elements Details=d.parse().select("div.details");           //所有方法的细节全在类为details的div标签下
                    //System.out.println(Details.size());
                    //System.out.println(constructorDetails.select("h4"));
                    Details.forEach(mem->{
                        for(int j=0;j<mem.select("h4").size();j++){                   //标签h4是方法名,pre是api，一个方法名对应一个api
                            System.out.println("-----------------------------------------------------------");
                            System.out.println(mem.select("h4").get(j).text());        
                            System.out.println(mem.select("pre").get(j).text());
                            String line="--------------------------------------------------------------";
                            byte[] b=mem.select("h4").get(j).text().getBytes();
                            byte[] b1=mem.select("pre").get(j).text().getBytes();
                            byte[] b2=line.getBytes();

                            int len=0;
                            try {
                                bw.write(mem.select("h4").get(j).text());
                                bw.newLine();
                                bw.write(mem.select("pre").get(j).text().replaceAll(" "," "));           //处理&nbsp（非截断空白符）
                                bw.newLine();
                                bw.write(line);
                                bw.newLine();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    });

                    System.out.println("h"+(i+1)+":"+h.get(i));
                }
            }
        }
    }
}