今天做数据抽取的时候一直在用select,但是后来发现有的不能抽取出来,后来发现xpath更快速
首先给pom.xml加入依赖
<dependency>
<groupId>cn.wanghaomiao</groupId>
<artifactId>JsoupXpath</artifactId>
<version>2.2</version>
</dependency>
这里面可以用copy path来代替要抽取的数据
public static String rules7(String html){
String result = "";
//copy xpath
String xpath="//*[@id=\"detail\"]/div[2]/div/div[2]/div/div[3]/div/text()[2]";
JXDocument jxDocument = new JXDocument(html);
List<Object> rs = jxDocument.sel(xpath);
for (Object o:rs){
if (o instanceof Element){
int index = ((Element) o).siblingIndex();
System.out.println(index);
}
// System.out.println(o.toString());
result = o.toString();
}
if(result.contains("一、采购人:")){
try {
result = result.split("地址:")[1].split("联系方式:")[0];
}catch (Exception e){
result = "";
}
}else{
result = "";
}
return result;
}