public static HashMapgetDetails(String html)
{
HashMap details = new HashMap();//解析开始
Document doc =Jsoup.parse(html);//获取形如
Element divAll = doc.select("div[class^=result_info]").first();//System.out.println(divAll.text());
Element head = divAll.select("h3").first();//获取div中的h3标签
String tvname = head.select("a").attr("title").trim();//获取h3标签中a标签的title属性值
String year = "";if (head.select("em").size() > 0)//假如h3标签中存在多个标签
{
year= head.select("em").first().text().trim();//只要第一个首尾之间的文本
}
String score= "";
Element scoreSection= divAll.select("p").first();if (scoreSection != null)
{
Elements es= scoreSection.select("span");//选择span元素
int size =es.size();for (int i = 0; i < size; i++)
{
Element e=es.get(i);
String content=e.text().trim();
content= content.replace("\u00A0", "");//替换 为空格
score +=content;
}
}
HashMap lstOtherInfo = new HashMap();
Elements otherSections= divAll.select("div[class^=result_info_cont]");//获取满足class以result_info_cont开头的所有div元素
int size = otherSections.size();//获取满足条件的div元素的总数是多少
int infoCount = 0;for (int i = 0; i < size && infoCount < 3; i++)
{
String value= "";
Element item= otherSections.get(i);//获取第i个元素
boolean keyflag = true;
String key= "";for (int index = 0; index < item.children().size(); index++)//Element.children()用于获取元素的直接子元素
{
Element e= item.child(index);//获取第index个子元素
if(keyflag)
{
key=e.text().trim();if (key == "简介")break;
keyflag= false;
}else{if (e.children().size() > 0)
{for (int b = 0; b < e.children().size(); b++)
{
value+= e.child(b).text().trim() + ",";
}
}else{
String contents=e.text().trim();
value+= contents + ",";
}
}
}
value= value.replaceAll("[" + "," + "]$", "");
lstOtherInfo.put(key, value);
infoCount++;
}
details.put("tv", tvname);
details.put("year", year);
details.put("score", score);
details.put("otherInfo", lstOtherInfo);returndetails;
}