代码示例
package demo0806;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
public class ScanTitleFromWebPage {
private String website;
private Map<String,String> recentShareCode=new HashMap<String,String>();
private Map<String,String> hotShareCode=new HashMap<String,String>();
public ScanTitleFromWebPage(String website) {
this.website=website;
}
public String ScanWebForTitle() {
InputStream inputStream=null;
String title=null;
try {
//创建URL对象,例如:百度搜索中国好声音
//wd关键词的值即为"中国好声音"的UTF-8编码,
//可以使用URLEncoder对字符进行编
URL url = new URL(website);
//创建URLConnection对象
URLConnection openConnection = url.openConnection();
//有些网站不允许java作为客户端访问
openConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
//获取网页信息编码类型
String headerField = openConnection.getHeaderField("Content-Type");
int indexOf = headerField.indexOf("charset=");
String encoding = headerField.substring(indexOf+8);
//获取URLConnection对象的输入流
inputStream=openConnection.getInputStream();
//通过IO来读取流,写入文件
String line=null;
InputStreamReader inputStreamReader;
inputStreamReader = new InputStreamReader(inputStream,encoding);
BufferedReader bufferedReader =
new BufferedReader(inputStreamReader);
int flagOfRecentShareCode=0;
int flagOfHotShareCode=0;
String recentCode=null;
String recentHref=null;
String hotCode=null;
String hotHref=null;
while((line=bufferedReader.readLine())!=null) {
int start=-1;
int end=-1;
if((start=line.indexOf("<title>"))!=-1) {
end=line.indexOf("</title>");
title = line.substring(start+7, end);
}
else if(line.indexOf("NewCodeList")!=-1) {
flagOfRecentShareCode=1;
}
else if(line.indexOf("HotCodeList")!=-1) {
flagOfRecentShareCode=0;
flagOfHotShareCode=1;
}
else if(line.indexOf( "</div>")!=-1) {
flagOfHotShareCode=0;
}
else if((start= line.indexOf("href="))!=-1&&flagOfRecentShareCode==1) {
end=line.indexOf(" target");
recentHref=line.substring(start+6, end-1);
}
else if((start= line.indexOf("href="))!=-1&&flagOfHotShareCode==1) {
end=line.indexOf(" target");
hotHref=line.substring(start+6, end-1);
}
else if((start= line.indexOf("title="))!=-1&&flagOfRecentShareCode==1) {
end=line.indexOf(">");
recentCode=line.substring(start+7, end-1);
recentShareCode.put(recentCode, recentHref);
}
else if((start= line.indexOf("title="))!=-1&&flagOfHotShareCode==1) {
end=line.indexOf(">");
hotCode=line.substring(start+7, end-1);
hotShareCode.put(hotCode, hotHref);
}
}
inputStreamReader.close();
} catch (IOException e) {
System.err.println("无法下载");
e.printStackTrace();
} finally {
if(inputStream!=null) {
try {
inputStream.close();
} catch(Exception ex) {
//不处理
}
}
}
return title;
}
public static void main(String[] args) throws InterruptedException, IOException {
String website="http://www.oschina.net";
ScanTitleFromWebPage scanTitleFromWebPage;
scanTitleFromWebPage=new ScanTitleFromWebPage(website);
String title = scanTitleFromWebPage.ScanWebForTitle();
File file=new File("OSChomepage.html");
FileOutputStream fileOutputStream = new FileOutputStream(file);
if(title!=null) {
String str="网站标题为:"+title;
byte[] bytes = str.getBytes();
fileOutputStream.write(bytes);
fileOutputStream.write('\n');
System.out.println(str);
}
Map<String,String> recentShareCode=scanTitleFromWebPage.recentShareCode;
Map<String,String> hotShareCode=scanTitleFromWebPage.hotShareCode;
Set<Entry<String, String>> entrySet;
Iterator<Entry<String, String>> iterator;
String key=null;
String value=null;
Entry<String, String> next=null;
fileOutputStream.write("----------------最新分享代码有如下----------------".getBytes());
fileOutputStream.write('\n');
System.out.println("----------------最新分享代码有如下----------------");
entrySet= recentShareCode.entrySet();
iterator= entrySet.iterator();
while(iterator.hasNext()) {
next = iterator.next();
key=next.getKey();
value=next.getValue();
String str=key+"\t"+"("+value+")";
byte[] bytes = str.getBytes();
fileOutputStream.write(bytes);
fileOutputStream.write('\n');
System.out.println(key+"\t"+"("+value+")");
}
fileOutputStream.write("----------------本周最热门代码有如下----------------".getBytes());
fileOutputStream.write('\n');
System.out.println("------------------本周最热门代码有如下-----------------");
entrySet= hotShareCode.entrySet();
iterator= entrySet.iterator();
while(iterator.hasNext()) {
next = iterator.next();
key=next.getKey();
value=next.getValue();
String str=key+"\t"+"("+value+")";
byte[] bytes = str.getBytes();
fileOutputStream.write(bytes);
fileOutputStream.write('\n');
System.out.println(key+"\t"+"("+value+")");
}
fileOutputStream.close();
}
}
运行结果
网站标题为:开源中国 - 找到您想要的开源项目,分享和交流
----------------最新分享代码有如下----------------
iOS 一个函数同时返回多个参数的策略 (http://www.oschina.net/code/snippet_865986_50059)
jquery插件--ajaxfileupload.js (http://www.oschina.net/code/snippet_105637_50057)
计蒜客-挑战难题-6 (http://www.oschina.net/code/snippet_587996_50055)
图片延迟加载简单原理 (http://www.oschina.net/code/snippet_1590754_50058)
我该如何书写一段能实现早睡早起的代码? (http://www.oschina.net/code/snippet_1168184_50061)
shell获取当前脚本执行绝对路径 (http://www.oschina.net/code/snippet_1988965_50056)
冒泡排序算法java实现 (http://www.oschina.net/code/snippet_587996_50052)
js脚本控制翻页控件概述。这个控件主要用来翻页的一个效果,如果有喜欢的可以那去参考,呵呵 (http://www.oschina.net/code/snippet_1862064_50060)
选择排序算方法java实现 (http://www.oschina.net/code/snippet_587996_50053)
计蒜客-挑战难题-5 (http://www.oschina.net/code/snippet_587996_50054)
------------------本周最热门代码有如下-----------------
python实现爬图,不要再爬妹子图了,太没品了 (http://www.oschina.net/code/snippet_2371155_49889)
通过银行卡号取得银行名字 (http://www.oschina.net/code/snippet_1252640_49997)
Java反射基础,构建框架(重要) (http://www.oschina.net/code/snippet_2345495_49988)
spring boot + mybatis+ spring mvc整合 (http://www.oschina.net/code/snippet_2325859_49871)
全医通 - HTML5开发,单页集成版 (http://www.oschina.net/code/snippet_2287693_50012)
微信公众号支付 (http://www.oschina.net/code/snippet_1754599_49966)
我的Eclipse代码格式化风格 (http://www.oschina.net/code/snippet_1584959_49953)
python简单爬虫 (http://www.oschina.net/code/snippet_2391943_49998)
获取情敌电脑内照片神器 (http://www.oschina.net/code/snippet_2425035_49995)
12306火车票API接口QQ 89914505 (http://www.oschina.net/code/snippet_811693_49880)