package com.demo;
import java.io.File;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
/**
* @author yangluan
* @ 程序使用Jsoup解析的利用的css 不是xpath
* @date 2016-11-24 16:55:20
*/
public class TestJsoupXml {
public static String getText() {
Document doc =null;
String text = new String();
try {
doc=Jsoup.parse(new File("D:/文件/智库网专家/XML/0.xml"), "UTF-8");
String org = doc.select("#expert > div.pub_right > div.expert_content > p.thefirstp").text();
// System.out.println("org="+org);
String orgContent = doc.select("#expert > div.pub_right > div.expert_content > p:nth-child(2)").text();
// System.out.println("orgContent="+orgContent);
String bio = doc.select("#expert > div.pub_right > div.expert_content > p:nth-child(3)").text();
// System.out.println("bio="+bio);
String bioContent = doc.select("#expert > div.pub_right > div.expert_content > p:nth-child(4)").text();
// System.out.println("bioContent="+bioContent);
String study = doc.select("#yjly").text();
// System.out.println("study="+study);
String studyContent = doc.select("#expert > div.pub_right > div.expert_content > p:nth-child(6)").text();
// System.out.println("studyContent="+studyContent);
String edu = doc.select("#yjly").text();
// System.out.println("edu="+edu);
String eduContent = doc.select("#expert > div.pub_right > div.expert_content > p:nth-child(8)").text();
// System.out.println("eduContent="+eduContent);
String report = doc.select("#国研报告").text();
// System.out.println("report="+report);
String reportContent = doc.select("#expert > div.pub_right > div.expert_content > ul:nth-child(10) > li > a").text();
// System.out.println("reportContent="+reportContent);
String xueshu = doc.select("#学术论文").text();
// System.out.println("xueshu="+xueshu);
String xueshuContent = doc.select("#expert > div.pub_right > div.expert_content > ul:nth-child(12) > li > a").text();
// System.out.println("xueshuContent="+xueshuContent);
String zhuzuo = doc.select("#学术著作").text();
// System.out.println("zhuzuo="+zhuzuo);
String zhuzuoContent = doc.select("#expert > div.pub_right > div.expert_content > ul:nth-child(14) > li > a").text();
// System.out.println("zhuzuoContent="+zhuzuoContent);
String huodong = doc.select("#学术活动").text();
// System.out.println("huodong="+huodong);
String huodongContent = doc.select("#expert > div.pub_right > div.expert_content > ul:nth-child(16) > li > a").text();
// System.out.println("huodongContent="+huodongContent);
String jianzhi = doc.select("#gnXsjz").text();
// System.out.println("jianzhi="+jianzhi);
String jianzhiContent = doc.select("#expert > div.pub_right > div.expert_content > p:nth-child(18)").text();
// System.out.println("jianzhiContent="+jianzhiContent);
String rongyu = doc.select("#gnHjqk").text();
// System.out.println("rongyu="+rongyu);
String rongyuContent = doc.select("#expert > div.pub_right > div.expert_content > p:nth-child(20)").text();
// System.out.println("rongyuContent="+rongyuContent);
StringBuffer sb = new StringBuffer();
sb.append(org);
sb.append("\r\n");
sb.append(orgContent);
sb.append("\r\n");
sb.append(bio);
sb.append("\r\n");
sb.append(bioContent);
sb.append("\r\n");
sb.append(study);
sb.append("\r\n");
sb.append(studyContent);
sb.append("\r\n");
sb.append(edu);
sb.append("\r\n");
sb.append(eduContent);
sb.append("\r\n");
sb.append(report);
sb.append("\r\n");
sb.append(reportContent);
sb.append("\r\n");
sb.append(xueshu);
sb.append("\r\n");
sb.append(xueshuContent);
sb.append("\r\n");
sb.append(zhuzuo);
sb.append("\r\n");
sb.append(zhuzuoContent);
sb.append("\r\n");
sb.append(huodong);
sb.append("\r\n");
sb.append(huodongContent);
sb.append("\r\n");
sb.append(jianzhi);
sb.append("\r\n");
sb.append(jianzhiContent);
sb.append("\r\n");
sb.append(rongyu);
sb.append("\r\n");
sb.append(rongyuContent);
sb.append("\r\n");
text = sb.toString();
} catch (IOException e) {
e.printStackTrace();
}
return text;
}
}
转载于:https://www.cnblogs.com/xgwtzg/p/6141767.html