从百度空间到CSDN——博客搬家源码-CSDN博客

注意：下面的方法在csdn博客改版以后无法使用，因为现在csdn博客不支持metadata api，不知道什么时候可以支持。

1.原文连接

http://hi.baidu.com/cnjsp/blog/item/e175cf1b27bc6af6ae513335.html

2.心得

本方法我测试过，是可以用来的，一则感觉思路挺新颖了，程序员自己写代码解决自己的事情。另一个可以通过这个实例学习一下java，所以我贴出我修改后的java代码。

具体思路可以参见原文。

3.代码

CSDNPost.java

package cn.mingyuan.baidu2csdn.core; import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Date; import java.util.HashMap; import java.util.Map; import org.apache.xmlrpc.XmlRpcException; import org.apache.xmlrpc.client.XmlRpcClient; import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; /** * csdn博文 * * @author mingyuanonline@gmail.com * */ public class CSDNPost { /** * 博文创建日期 */ private Date dateCreated; /** * 博文内容 */ private String description; /** * 标题 */ private String title; /** * 博文分类 */ private String[] categories; public CSDNPost() { } public CSDNPost(String title, String description, String[] categories, Date dateCreated) { this.dateCreated = dateCreated; this.description = description; this.title = title; this.categories = categories; } public Date getDateCreated() { return dateCreated; } public void setDateCreated(Date dateCreated) { this.dateCreated = dateCreated; } public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String[] getCategories() { return categories; } public void setCategories(String[] categories) { this.categories = categories; } /** * xml-rpc配置 */ private static XmlRpcClientConfigImpl config; /** * xml-rpcClient */ private static XmlRpcClient client; static { config = new XmlRpcClientConfigImpl(); try { // 此处请将telnetor替换为您的用户名 config.setServerURL(new URL( "http://blog.csdn.net/xw13106209/services/metablogapi.aspx")); } catch (MalformedURLException e) { System.out.println("请检查url"); } client = new XmlRpcClient(); client.setConfig(config); } /** * 日志记录 * * @param log * log */ private void writelog(String log) { FileOutputStream fos = null; try { fos = new FileOutputStream("post.log", true); fos.write((log + "\r\n").getBytes()); fos.flush(); fos.close(); } catch (IOException e) { System.out.println("写入日志错误：" + log); } } /** * 发布 */ public void publish() { Map<String, Object> struct = new HashMap<String, Object>(); struct.put("dateCreated", dateCreated); struct.put("description", description); struct.put("title", title); struct.put("categories", categories); // Object[] params = new Object[] { "your usrname", // "replace it with your username", // "replace it with your password", struct, true }; Object[] params = new Object[] { "xw13106209", "xw13106209", "password", struct, true }; String blogid = null; try { blogid = (String) client.execute("metaWeblog.newPost", params); } catch (XmlRpcException e) { writelog("导入出现错误：title=" + title); System.out.println("导入出现错误：title=" + title); } writelog(title + ">> 导入完毕,生成博文id为>>" + blogid); System.out.println(title + ">> 导入完毕,生成博文id为>>" + blogid); struct.clear(); } public static void main(String[] args) { CSDNPost post = new CSDNPost(); post.publish(); } }

BaiduHi

package cn.mingyuan.baidu2csdn.core; import java.util.Date; /** * 百度博客 * * @author mingyuanonline@gmail.com * */ public class BaiduHi { /** * 标题 */ private String title; /** * 内容 */ private String description; /** * 分类 */ private String categories; /** * 发布日期 */ private Date dateCreated; public String getTitle() { return title; } public String getDescription() { return description; } public String getCategories() { return categories; } public Date getDateCreated() { return dateCreated; } public void setTitle(String title) { this.title = title; } public void setDescription(String description) { this.description = description; } public void setCategories(String categories) { this.categories = categories; } public void setDateCreated(Date dateCreated) { this.dateCreated = dateCreated; } public BaiduHi(String title, String description, String categories, Date dateCreated) { this.title = title; this.description = description; this.categories = categories; this.dateCreated = dateCreated; } public BaiduHi() { // TODO Auto-generated constructor stub } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } }

BaiduHiFetcher

package cn.mingyuan.baidu2csdn.core; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Stack; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 百度博客数据抓取及解析 * * @author mingyuanonline@gmail.com * */ public class BaiduHiFetcher { /** * 下载页面 * * @param url * url * @return 网页源码 */ private String downloadPage(String url) { URLConnection conn; InputStream in; BufferedReader reader = null; StringBuilder sb = new StringBuilder(); String line = null; try { conn = new URL(url).openConnection(); in = conn.getInputStream(); reader = new BufferedReader(new InputStreamReader(in, "gb2312")); while ((line = reader.readLine()) != null) { sb.append(line); } in.close(); reader.close(); } catch (MalformedURLException e) { System.out.println("请检查url是否规范"); } catch (IOException e) { System.out.println("读取源码错误:" + url); } return sb.toString(); } /** * 获取页面博文链接 * * @param html * 网页源码 * @return 页面中的博文链接 */ private List<String> getPostLinks(String html) { // 分析页面内容，取得页面中的文章链接 String titleDivRegex = "<div[\\s]class=\"tit\"><a[\\s]href=[^<>]+?target=\"_blank\">.+?</div>"; Pattern titleDivPattern = Pattern.compile(titleDivRegex); Matcher titleDivMatcher = titleDivPattern.matcher(html); List<String> posts = new ArrayList<String>(); while (titleDivMatcher.find()) { String div = titleDivMatcher.group(); String titleUrl = div.substring(div.indexOf("/"), div .indexOf("\" target")); posts.add("http://hi.baidu.com" + titleUrl); } return posts; } /** * <p> * 获取博客总页数 <br> * 我的博客内容有16页，有上一页，下一页，尾页等这样的标志，如果博文少的话可能这些标志不会出现，请修改此方法 * * @param html * 源码（最好是第一页） * @return 博客总页数 */ private int getTotalPages(String html) { // 页码 // <a href="/cnjsp/blog/index/16" // mce_href="cnjsp/blog/index/16">[尾页]</a> String pageRegex = "<a[\\s]href=\"/cnjsp/blog/index/[\\d][\\d]\">\\[尾页\\]</a>"; Pattern pagePattern = Pattern.compile(pageRegex); Matcher pageMatcher = pagePattern.matcher(html); String totalPagesStr = null; int pages = 0; if (pageMatcher.find()) { String pagelink = pageMatcher.group(); totalPagesStr = pagelink.replaceAll( "<a[\\s]href=\"/cnjsp/blog/index/", "").replaceAll( "\">\\[尾页\\]</a>", ""); pages = Integer.parseInt(totalPagesStr); } return pages; } /** * <p> * 获取博客的所有博文的地址 <br> * 没有对url进行编码处理，如果博客地址含中文，请对url进行处理 * * @param blogUrl * 博客地址 * @return 所有博文地址，存放于栈中，使用的时候请使用pop方法取出元素，这样可以保证按照最先发表的博文最先处理 */ public Stack<String> getAllPostLink(String blogUrl) { Stack<String> posts = new Stack<String>(); // 1.下载第一页 String firstPageHtml = downloadPage(blogUrl + "/blog/index/0"); // 2.获取博文总页数 // int totalPages = getTotalPages(firstPageHtml); int totalPages = 2; // 3.下载各摘要页 posts.addAll(getPostLinks(firstPageHtml)); if (totalPages < 1) { return posts; } for (int i = 1; i <= totalPages; i++) { String page = downloadPage(blogUrl + "/blog/index/" + i); posts.addAll(getPostLinks(page)); } return posts; } /** * 解析博文，获取标题，发布时间，内容，分类等信息 * * @param postUrl * 博文地址 * @return 封装了博文信息的BaiduHi */ public BaiduHi getBaiduHi(String postUrl) { String html = downloadPage(postUrl); // /<div class="tit"> String titleDivRegex = "<div[\\s]id=\"m_blog\"[\\s]class=\"modbox\"[\\s]style=\"overflow-x:hidden;\"><div[\\s]class=\"tit\">.+?</div><div[\\s]class=\"date\">"; Pattern titleDivPattern = Pattern.compile(titleDivRegex); Matcher titleDivMatcher = titleDivPattern.matcher(html); String title = null; if (titleDivMatcher.find()) { title = titleDivMatcher .group() .replaceAll( "<div[\\s]id=\"m_blog\"[\\s]class=\"modbox\"[\\s]style=\"overflow-x:hidden;\"><div[\\s]class=\"tit\">", "") .replaceAll("</div><div[\\s]class=\"date\">", "").trim(); } String dateDivRegex = "<div[\\s]class=\"date\">.+?</div>"; Pattern dateDivPattern = Pattern.compile(dateDivRegex); Matcher dateMatcher = dateDivPattern.matcher(html); String dateStr = null; Date postDate = null; if (dateMatcher.find()) { dateStr = dateMatcher.group().replaceAll( "<div[\\s]class=\"date\">", "").replaceAll("</div>", "") .trim(); postDate = getDate(dateStr); } String textDivRegex = "<div[\\s]id=\"blog_text\"[\\s]class=\"cnt\"[\\s]+>.+?</div>"; Pattern textDivPattern = Pattern.compile(textDivRegex); Matcher textMatcher = textDivPattern.matcher(html); String text = null; if (textMatcher.find()) { text = textMatcher.group().replaceAll( "<div[\\s]id=\"blog_text\"[\\s]class=\"cnt\"[\\s]+>", "") .replaceAll("</div>", "").trim(); } String categoriesRegex = "title=\"查看该分类中所有文章\">类别：.+?</a>"; Pattern categoriesDivPattern = Pattern.compile(categoriesRegex); Matcher categoriesMatcher = categoriesDivPattern.matcher(html); String categories = null; if (categoriesMatcher.find()) { categories = categoriesMatcher.group().replaceAll( "title=\"查看该分类中所有文章\">类别：", "").replaceAll("</a>", "") .trim(); } BaiduHi hi = new BaiduHi(); hi.setTitle(title); hi.setDescription(text); hi.setCategories(categories); hi.setDateCreated(postDate); return hi; } /** * 解析博文中的日期格式返回Date类型 * 日期格式为：2011年07月01日星期五下午 01:05 * @param str * 博文中的日期 * @return Date类型日期 */ @SuppressWarnings("deprecation") private Date getDate(String str) { String yearStr = str.substring(0, str.indexOf("年")).trim(); String monthStr = str.substring(str.indexOf("年"), str.indexOf("月")) .replace("年", "").trim(); String dayStr = str.substring(str.indexOf("月"), str.indexOf("日")) .replace("月", "").trim(); String timeStr = str.substring(str.indexOf("午")).replace("午", "") .trim(); String hourStr = timeStr.split(":")[0]; String minutesStr = timeStr.split(":")[1]; Date date = new Date(); date.setYear(Integer.parseInt(yearStr) - 1900); date.setMonth(Integer.parseInt(monthStr) - 1); date.setDate(Integer.parseInt(dayStr)); if (str.contains("下午")) { date.setHours(Integer.parseInt(hourStr) + 12); } else { date.setHours(Integer.parseInt(hourStr)); } date.setMinutes(Integer.parseInt(minutesStr)); return date; } }

Transfer

package cn.mingyuan.baidu2csdn.core; import java.util.Stack; /** * 搬家 * * @author mingyuanonline@gmail.com * */ public class Transfer { /** * @param args / public static void main(String[] args) { // TODO Auto-generated method stub //String postUrl = "http://hi.baidu.com/cnjsp"; String postUrl = "http://hi.baidu.com/xwdreamer"; BaiduHiFetcher fetcher = new BaiduHiFetcher(); Stack<String> urls = null; urls = fetcher.getAllPostLink(postUrl); while (!urls.isEmpty()) { String url = urls.pop(); BaiduHi hi = null; hi = fetcher.getBaiduHi(url); CSDNPost post = new CSDNPost(); post.setTitle(hi.getTitle()); post.setDescription(hi.getDescription()); post.setCategories(new String[] { hi.getCategories() }); post.setDateCreated(hi.getDateCreated()); post.publish(); try { Thread.sleep(5 1000); } catch (InterruptedException e) { System.out.println("休眠出错"); } } } }

DeletePostById

package cn.mingyuan.baidu2csdn.core; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import org.apache.xmlrpc.XmlRpcException; import org.apache.xmlrpc.client.XmlRpcClient; import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; public class DeletePostById { private static XmlRpcClientConfigImpl config; private static XmlRpcClient client; static { config = new XmlRpcClientConfigImpl(); try { config.setServerURL(new URL( "http://blog.csdn.net/telnetor/services/metablogapi.aspx")); } catch (MalformedURLException e) { System.out.println("请检查url"); } client = new XmlRpcClient(); client.setConfig(config); } /** * 删除帖子 * * @param appkey * appkey，可以任意，这是一个忽略的值 * @param postid * 帖子id * @param username * 用户名 * @param password * 密码 * @param publish * 博客在帖子被删除之后是否重新发布 */ public static void delete(String appkey, String postid, String username, String password, boolean publish) { Object[] params = new Object[] { "ignored value", postid, username, password, true }; try { client.execute("blogger.deletePost", params); } catch (XmlRpcException e) { System.out.println("删除出错，postid=" + postid); } System.out.println(postid + "删除完毕"); } /** * @param args * @throws InterruptedException */ public static void main(String[] args) throws InterruptedException { BufferedReader reader = null; String line; try { reader = new BufferedReader(new InputStreamReader( new FileInputStream("content"))); while ((line = reader.readLine()) != null) { line = line.split("生成博文id为：")[1]; delete("ignored", line, "your username", "your password", true); Thread.sleep(1000 * 10); } } catch (FileNotFoundException e1) { System.out.println("文件没找到"); } catch (IOException e) { System.out.println("读取文件失败"); } } }