java取_java提取(获取)博客信息(内容)

packagecom.wbg.my.service;import java.io.*;importjava.net.HttpURLConnection;importjava.net.URL;import java.util.*;importjava.util.regex.Matcher;importjava.util.regex.Pattern;/***@authorJack Chen

**/

public classBlogUtil {/*** URL_PAGE:cnblogs url

* URL_PAGE_DETAIL:详情页url

* PAGE_COUNT:页数

* urlLists:所有详情页url Set集合(防止重复)

* p:匹配模式

**/

public final static String URL_PAGE = "https://www.cnblogs.com/weibanggang/default.html?page=";public final static String URL_PAGE_DETAIL = "https://www.cnblogs.com/weibanggang/p/([0-9]+.html)";public final static int PAGE_COUNT = 20;public static Set urlLists = new TreeSet();public final static Pattern p =Pattern.compile(URL_PAGE_DETAIL);//文件路径

public static String file="d:index.html";static String [] arr=null;static int sun=0;public static void main(String[] args) throwsException {for(int i = 1;i<=PAGE_COUNT;i++) {

getUrls(i);

}

System.out.println("开始获取内容!");

arr=newString[urlLists.size()];for(Iterator i =urlLists.iterator();i.hasNext();) {

createFile(i.next());

sun++;

}

System.out.println("获取内容完毕!");

System.out.println("开始写入文件!");

StringBuffer stringBuffer=newStringBuffer(kais());for (int i = 0; i < arr.length; i++) {

stringBuffer.append(arr[i]);

}

stringBuffer.append(jiehun());

System.out.println("写入文件完毕!");

System.out.println("开始导出文件!");

createFile(file,stringBuffer);

System.out.println("导出文件完毕!");

System.out.println("输出文件地址为:"+file);

}/** 将结果写入文件*/

private static voidcreateFile(String file, StringBuffer buffer) {try{

File newFile= newFile(file);if (newFile.exists())//存在,则删除

if (!newFile.delete())//删除成功则创建

{

System.err.println("删除文件" + newFile + "失败");

}if (newFile.createNewFile()) {//创建成功,则写入文件内容

PrintWriter p = new PrintWriter(newFileOutputStream(newFile

.getAbsolutePath()));

p.write(buffer.toString());

p.close();

}else{

System.err.println("创建文件:" + newFile + "失败");

}

}catch(Exception e) {

e.printStackTrace();

}

}//开始头部

public staticString kais(){return "\n" +

"\n" +

"

\n" +

" \n" +

"

weibanggang.github.io\n" +

" \n" +

" \n" +

" \n" +

"

" html,body{width:100%;height: 100%}\n" +

" table{width: 1150px;height:500px;margin: auto}\n" +

" table,td,th{border: 1px solid #e6e6e6;border-collapse:collapse; }\n" +

" body{-moz-background-size:100% 100%; background-size:100% 100%;background-image:url(\"link.jpg\");background-repeat: no-repeat} body{-moz-background-size:100% 100%; background-size:100% 100%;background-image:url(\"link.jpg\");background-repeat: no-repeat}\n" +

" * { margin: 0; padding: 0; }\n" +

" table { border-collapse: collapse; text-align: center; }\n" +

" /*关键设置 tbody出现滚动条*/\n" +

" table tbody {\n" +

" display: block;\n" +

" height: 500px;\n" +

" overflow-y: scroll;overflow-x:hidden;\n" +

" }\n" +

" table thead, tbody tr { display: table;width: 100%; table-layout: fixed; }\n" +

" table thead th { height: 40px }\n" +

" table tbody td {height: 30px }\n" +

" \n" +

"\n" +

"\n" +

"

\n" +

"

本网页仅作为参考博客、github等地址

\n" +

"

" \n" +

"

\n" +

"

序号\n" +

"

标题链接\n" +

"

时间\n" +

"

来源\n" +

"

备注\n" +

"

\n" +

"

\n" +

"

\n" +

"\n" +

"

\n" +

"

\n" +

"\n" +

"\n" +

"

" var sum=[";

}//结尾

public staticString jiehun(){return " ];\n" +

" \n" +

" for(var i=0;i

" var tr=$(\"

\");\n" +

" //序号\n" +

" $(\"

\").html(i+1).appendTo(tr);\n" +

" //标题链接\n" +

" var a=\"\"+sum[i][1]+\"\"\n" +

" $(\"

\").html(a).appendTo(tr);\n" +

" //时间\n" +

" $(\"

\").html(sum[i][2]).appendTo(tr);\n" +

" //来源\n" +

" $(\"

\").html(sum[i][3]).appendTo(tr);\n" +

" //备注\n" +

" $(\"

\").html(sum[i][4]).appendTo(tr);\n" +

" $(\"table tbody\").append(tr);\n" +

" }\n" +

"\n" +

"";

}static String fh="";/***@paramurl

* 获取所有内容

*@throws

*/

private static void createFile(String url) throwsException {

Matcher m=p.matcher(url);

m.find();

String fileName= m.group(1);

URL u= newURL(url);

HttpURLConnection conn=(HttpURLConnection) u.openConnection();

conn.connect();

BufferedReader br= new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));

String str;

StringBuffer s=newStringBuffer();while((str = br.readLine()) != null){

s.append(str);

}

String href="https://www.cnblogs.com/weibanggang/p/"+fileName;

String title=getTitle(s);

String data=getDate(s);

arr[sun]=fh+"[\""+href+"\",\""+title+"\",\""+data+"\",\"博客\",\"正常\"]";

fh=",";

br.close();

conn.disconnect();

}//获取时间

public staticString getDate(StringBuffer sb){int first=sb.indexOf("")+"".length();

String aa=sb.substring(first);int last=aa.indexOf("");

String sa=aa.substring(0,last);returnsa;

}//获取标题

public staticString getTitle(StringBuffer sb){int first=sb.indexOf("

");int last=sb.indexOf("");

String sa=sb.substring(first+7,last);int errorindex=sa.lastIndexOf("- 韦邦杠 - 博客园");return sa.substring(0,errorindex);

}/***@paramidx

* 获取页数

*@throws

*/

private static void getUrls(int idx) throwsException{

URL u= new URL(URL_PAGE+""+idx);

HttpURLConnection conn=(HttpURLConnection) u.openConnection();

conn.connect();

BufferedReader br= new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));

String str;while((str = br.readLine()) != null){if(null != str && str.contains("https://www.cnblogs.com/weibanggang/p/")) {

Matcher m=p.matcher(str);if(m.find()) {

urlLists.add(m.group());

}

}

}

br.close();

conn.disconnect();

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值