将http://www.sina.com/的HTML保存到c://shengchengdeHTML.html



Java code

import java.net.*;

import java.io.*;

import java.util.regex.Pattern;



public class CreateHTML {

    public static void main(String[] args) {

        CreateHTML uc = new CreateHTML();

        uc.creatHTML("http://www.sina.com/", "c://shengchengdeHTML.html");

    }



    public void creatHTML(String webURL, String local) {

        //new File(local);

        FileWriter fw = null;

        BufferedWriter bw = null;

        try {

            fw = new FileWriter(local);

            bw = new BufferedWriter(fw);

        } catch (Exception ex) {

            ex.printStackTrace();

        }

        StringBuffer document = new StringBuffer();

        try {

            URL url = new URL(webURL);

            URLConnection conn = url.openConnection();

            BufferedReader reader = new BufferedReader(new InputStreamReader(

                    conn.getInputStream()));

            String line = null;

            while ((line = reader.readLine()) != null)

                document.append(line + " ");

            reader.close();

        } catch (MalformedURLException e) {

            e.printStackTrace();

        } catch (IOException e) {

            e.printStackTrace();

        }

        //System.out.println(document.toString());

        String strTemp = document.toString();

        String temp = null;

        int j = 0;

        for (int i = 0; i < strTemp.length(); i++) {

            if (i<strTemp.length()&&strTemp.charAt(i) == '>') {

                j = i;

                i++;

                if (i > strTemp.length() - 2) {

                    temp = strTemp;

                    addLine(temp, bw);

                    break;

                }

                while (Pattern.compile("//s{1}").matcher("" + strTemp.charAt(i)).find()) { //跳过空格

                    i++;

                }

                if (i<strTemp.length()&&strTemp.charAt(i) == '<') {

                    temp = strTemp.substring(0, i);

                    strTemp = strTemp.substring(i, strTemp.length());

                    addLine(temp, bw);

                    i = 0;

                } else {

                    i = j;

                }

            }



            if (strTemp.substring(0, 6).equalsIgnoreCase("<style")){

                while(true){

                    if (strTemp.charAt(i) == '}') {

                        temp = strTemp.substring(0, i + 1);

                        strTemp = strTemp.substring(i + 1, strTemp.length());

                        addLine(temp, bw);

                        i = 0;

                    }

                    if(strTemp.substring(i,i+8).equalsIgnoreCase("</style>")){

                        break;

                    }

                    i++;

                }

                i=0;

            }



            if (strTemp.substring(0, 7).equalsIgnoreCase("<script")){

                while(true){

                    if (strTemp.charAt(i) == '{'||strTemp.charAt(i) == '}'||strTemp.charAt(i) == ';') {

                        temp = strTemp.substring(0, i + 1);

                        strTemp = strTemp.substring(i + 1, strTemp.length());

                        addLine(temp, bw);

                        i = -1;

                    }

                    i++;

                    if(i>=strTemp.length()-9){

                        break;

                    }

                    if(strTemp.substring(i,i+9).equalsIgnoreCase("</script>")){

                        temp = strTemp.substring(0, i);

                        strTemp = strTemp.substring(i, strTemp.length());

                        addLine(temp, bw);

                        i = 0;

                        break;

                    }

                }

            }

        }

        //将上面步骤忽略的代码加入HTML页面

        while(Pattern.compile("//s{1}").matcher("" + strTemp.charAt(0)).find()){//去掉首空格、/t等

            strTemp=strTemp.substring(1);

        }

        if(strTemp.toLowerCase().indexOf("</html>")>0){//去掉首空格等后,如果不以</html>开头,则表示上面的步骤,没有完成了所有代码的格式化。这种情况需要将剩余代码加进HTML中

            addLine(strTemp, bw);

        }

        try {

            bw.flush();

            bw.close();

            fw.close();

        } catch (Exception ex) {

            ex.printStackTrace();

        }

    }



    private void addLine(String strLine, BufferedWriter bw) {

        try {

            bw.write(strLine);

            bw.newLine();

        } catch (Exception ex) {

            ex.printStackTrace();

        }

    }

}





  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值