CSDB Blog快速备份程序-备份你自己的Blog

以下程序需要htmlparser.jar。你可以直接从
http://umn.dl.sourceforge.net/sourceforge/htmlparser/htmlparser1_5_20040728.zip
下载,http://htmlparser.sourceforge.net是htmlparser的主页。

//copy from here.

/*******************************************************************************
 * $Header$
 * $Revision$
 * $Date$
 *
 *==============================================================================
 *
 * Copyright (c) 2001-2004 XXX Technologies, Ltd.
 * All rights reserved.
 *
 * Created on 2004-12-3
 *******************************************************************************/

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.lexer.Page;
import org.htmlparser.tags.Div;
import org.htmlparser.util.ParserException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

/**
 *
 * @author 晏斐 (mailto:mr_yanfei&yahoo.com)
 */
/*
 * 修改历史
 * $Log$
 */
public final class BlogBackupTool {
        private static final String RSS_URL = "http://blog.csdn.net/mr_yanfei/Rss.aspx";
        private static final String SAVE_PATH = "d://temp";
        private static final String CHANNEL = "channel";
        private static final String CHANNEL_ITEM = "item";
        private static final String ITEM_TITLE = "title";
        private static final String ITEM_LINK = "link";
       
        private static final boolean FILTER = true;
       
        class Blog {
                private String fTitle;
                private String fLink;
               
                public Blog(String title, String link) {
                        fTitle = title;
                        fLink = link;
                }
               
                public String getTitle() {
                        return fTitle;
                }
               
                public String getLink() {
                        return fLink;
                }
        }
       
        private Blog[] getBlogs(String rssUrl) {
        DocumentBuilderFactory factory =
            DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
       
        List result = new ArrayList();
        try {
                URL url = new URL(rssUrl);
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document document = builder.parse(url.openStream());
           
            Element channel = document.getDocumentElement();
           
            channel = (Element)document.getElementsByTagName(CHANNEL).item(0);
            if(CHANNEL.equals(channel.getLocalName())) {
               
                NodeList nodes = channel.getChildNodes();
                for(int i = 0; i < nodes.getLength(); i ++) {
                    org.w3c.dom.Node item = nodes.item(i);
                    if (CHANNEL_ITEM.equals(item.getLocalName())) {
                        String title = getChildNodeText(item, ITEM_TITLE);
                        String link = getChildNodeText(item, ITEM_LINK);
                        result.add(new Blog(title, link));
                    }
                }
            }
        } catch (Exception ex){
                ex.printStackTrace();
        }      
        return (Blog[])result.toArray(new Blog[result.size()]);
        }
       
       
        private String getChildNodeText(org.w3c.dom.Node item, String nodeName) {
               
                NodeList nodes = item.getChildNodes();
                for(int i = 0; i < nodes.getLength(); i++) {
                        org.w3c.dom.Node node = nodes.item(i);
                        if (nodeName.equals(node.getLocalName())) {
                                return node.getFirstChild().getNodeValue();
                        }
                }
                return null;
        }
       
        private String validFilename(String name) {
                String result = name.replace(':', '_');
                result = result.replace('/', '_');
                result = result.replace('//', '_');
                result = result.replace('?', '?');
                result = result.replace('*', '_');
                result = result.replace('<', '_');
                result = result.replace('>', '_');
                result = result.replace('|', '_');
                result = result.replace('"', '_');
                return result;
        }
       
        private void saveBlogs(Blog[] blogs) throws Exception{
               
                String title, link;
                for (int i = 0; i < blogs.length; i++) {
                        title = blogs[i].getTitle();
                        link = blogs[i].getLink();
                       
                        System.out.println("Get Blog " + title);
                        System.out.println("URL : " + link);

       
                        if (FILTER) {
                                Parser parser = null;
                                try {
                                        parser = new Parser(link);
                                } catch (ParserException ex) {
                                        continue;
                                }
                                Page page = parser.getLexer().getPage();
                                String pageUrl = page.getUrl();
                               
                                Node[] bases = parser.extractAllNodesThatAre(Div.class);
                                for (int j = 0; j < bases.length; j++) {
                                        String attr = ((Div)bases[j]).getAttribute("class");
                                       
                                        if (attr == null)
                                                attr = "";
                                       
                                        if (attr.equals("post")) {
                                                String content = ((Div)bases[j]).getChildrenHTML();
                                                saveBlogToFile(title + ".html", content);
                                                break;
                                        }
                                }
                                parser.reset();
                        }
                        else {
                                StringBuffer buffer = getHtmlFromURL(link);
                                saveBlogToFile(title + ".html", buffer.toString());
                        }
                }
        }
       

        private StringBuffer getHtmlFromURL(String url) {
        StringBuffer buffer = new StringBuffer();
                try {
            URL pageUrl = new URL(url);
       
            BufferedReader in = new BufferedReader(new InputStreamReader(pageUrl.openStream()));
            String str;
            while ((str = in.readLine()) != null) {
                buffer.append(str);
            }
            in.close();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return buffer;
        }
       
        private void saveBlogToFile(String filename, String content) {
                try {
                        filename = validFilename(filename);
                        File file = new File(SAVE_PATH, filename);
                    OutputStream out = new FileOutputStream(file);
                OutputStreamWriter writer = new OutputStreamWriter(out);
                writer.write(content);
                writer.close();
                } catch (IOException ex) {
                       
                }
        }
       
        public static void main(String[] args) throws Exception{
                BlogBackupTool reader = new BlogBackupTool();
                Blog[] blogs = reader.getBlogs(RSS_URL);
               
                reader.saveBlogs(blogs);
                String msg = MessageFormat.format("Totle {0} blogs saved.", new String[]{Integer.toString(blogs.length)});
                System.out.println(msg);
        }
}
//end

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值