JSOUP解析页面的方法
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class MatchAmazon {
public static MatchInfo getProductCode(ProductInfo productInfo){
String scriptCode = productInfo.getScriptCode();
Document doc = null;
MatchInfo opponInfo = new MatchInfo();
try {
doc = Jsoup.connect("http://www.amazon.cn/s/ref=nb_sb_noss?__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&url=search-alias%3Daps&field-keywords="+scriptCode).userAgent("Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.19) Gecko/20110707 Firefox/3.6.19").get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String detail = "";
if (null != doc) {
Elements doc1 = doc.select("div#atfResults.list");
if (null != doc1) {
detail = doc1.html();
}
}
if (detail == "" || detail.equals("")) {
opponInfo.setId(productInfo.getId());
opponInfo.setProductId(productInfo.getProductId());
opponInfo.setProductName(productInfo.getProductName());
opponInfo.setScriptCode(scriptCode);
opponInfo.setProductCode(productInfo.getProductCode());
return opponInfo;
} else {
String productCode = detail.substring(detail.indexOf("name=") + 6, detail.indexOf("\">"));
String productName = doc.select("div#result_0.result div.data h3.title").first().text();
System.out.println("productCode:" + productCode + "\tproductName:" + productName);
opponInfo.setId(productInfo.getId());
opponInfo.setProductId(productInfo.getProductId());
opponInfo.setProductName(productInfo.getProductName());
opponInfo.setScriptCode(scriptCode);
opponInfo.setOpponProductCode(productCode);
opponInfo.setProductCode(productInfo.getProductCode());
opponInfo.setOpponProductName(productName);
return opponInfo;
}
}
public static void exportAmazonToExcel(String filePath, Integer startIndex, Integer length, String fileName) throws Exception{
CsvImporter csvImporter = new CsvImporter(filePath);
List<ProductInfo> dataList = csvImporter.nextRows(startIndex,length);
List<MatchInfo> result = new ArrayList<MatchInfo>();
for (ProductInfo productInfo : dataList) {
MatchInfo matchResult = getProductCode(productInfo);
if (null != matchResult) {
if (matchResult.getOpponProductCode() == "") {
matchResult = getProductCode(productInfo);
result.add(matchResult);
}
result.add(matchResult);
}
}
System.out.println(result.size());
try {
MatchDtoToExcel.MatchInfo2Excel(result, fileName);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}