根据网址获取源码
/**
* 根据网址返回网页的源码
*
* @param htmlUrl
* @return
*/
public String getHtmlSource(String htmlUrl) {
URL url;
StringBuffer sb = new StringBuffer();
try {
url = new URL(htmlUrl);
BufferedReader in = new BufferedReader(new InputStreamReader(
url.openStream(), "UTF-8"));// 读取网页全部内容
String temp;
while ((temp = in.readLine()) != null) {
sb.append(temp);
}
in.close();
} catch (MalformedURLException e) {
} catch (IOException e) {
e.printStackTrace();
}
return sb.toString();
}
抓取源码中包含name字段的content内容
int start = html.lastIndexOf("<head>");
int end = html.lastIndexOf("</head>");
String str = (String) html.subSequence(start, end);
List<String> list = new ArrayList<String>();
String[] meta = str.split("<meta");
for (String string : meta) {
list.add(string);
}
for (String ss : list) {
if (ss.contains("name")) {
if (ss.split("\"|'|‘.*’").length > 3) {
Toast.makeText(getApplicationContext(), ss.split("\"|'|‘.*’")[3], 1).show();
}
}
}