采集ymx商品信息

public static void main(String[] args) {
String keyword = "鼠标";
keyword=UrlEncoded.encodeString(keyword);
System.out.println(keyword);

System.out.println(System.currentTimeMillis());
String urlStr = "https://www.amazon.cn/s/ref=sr_pg_1?rh=i%3Aaps%2Ck%3A%E9%BC%A0%E6%A0%87&page=1&keywords=%E9%BC%A0%E6%A0%87&ie=UTF8&qid=1535523044";
try {
String html =getHttpHeaders(urlStr);
Document doc = Jsoup.parse(html);
Elements els = doc.select("div.s-item-container");
String goodName ="";
String goodUrl ="";
String goodPrice ="";
for (Element e : els) {
goodName= e.getElementsByAttributeValue("class", "a-link-normal s-access-detail-page s-color-twister-title-link a-text-normal").attr("title");
goodUrl= e.getElementsByAttributeValue("class", "a-link-normal s-access-detail-page s-color-twister-title-link a-text-normal").attr("href");
goodPrice= e.getElementsByAttributeValue("class", "a-size-base a-color-price s-price a-text-bold").text();
if(goodPrice.contains("¥")){
goodPrice=goodPrice.replace("¥", "");
}
if(goodName.length()>0){
System.out.println("goodName="+goodName);
System.out.println("goodUrl="+goodUrl);
System.out.println("goodPrice="+goodPrice);
}

}

} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

public static void trustEveryone() {
try {
HttpsURLConnection
.setDefaultHostnameVerifier(new HostnameVerifier() {
public boolean verify(String hostname,
SSLSession session) {
return true;
}
});
SSLContext context = SSLContext.getInstance("TLS");
context.init(null, new X509TrustManager[] { new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] chain,
String authType) throws CertificateException {
}

public void checkServerTrusted(X509Certificate[] chain,
String authType) throws CertificateException {
}

public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
} }, new SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(context
.getSocketFactory());
} catch (Exception e) {
// e.printStackTrace();
}
}

public static String getHttpHeaders(String urlStr) {
try {
URL url = new URL(urlStr);
trustEveryone();
Connection conn = HttpConnection.connect(url);
conn.timeout(10000);
// 默认是 GET方式
conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
conn.header("Accept-Encoding", "gzip, deflate, br");
conn.header("Accept-Language", "zh-CN,zh;q=0.9");
conn.header("Cache-Control", "max-age=0");
conn.header("Connection", "keep-alive");
conn.header("Cookie", "session-id=459-7685866-4114631; ubid-acbcn=462-7605214-6740260; x-wl-uid=1/8WbnRRWqUmuHEwnk0vv/5QUwLPz7ExJ8sDDXwii/B7q7Q7du4lfbGND+N96grUw6aQQqTgRWjU=; session-token=\"61BROo0R3lWTferrCdSW7uhrLZsEr2nenTtEcaRYw5gdaQSPQTdVCZcBY4Z0m5aG3dYjFa9ig743cQQN1d/OFW215SCO+iS21Y8D1Gtf5HEPvdlzUxTlXuDJS1+MHRB7+DYx9NNXQagHg+lb6Rsz3ZIuTyd+HfSEmuoJMTuVUnxNbFcW/5CoeB+bWzkX9UmOa7StU6t/s+ak5O6uulcWZ+MTPx7lsF0KJfc+9CY3d6g/esAYdddvWA==\"; session-id-time=2082729601l; csm-hit=tb:XHNG7XF4GSF6CRHXT26C+sa-Q9YXGCPZY9WTMWCPRN2W-5JG052ATG7KJSS01Y89P|"+System.currentTimeMillis()+"&adb:adblk_yes");
conn.header("Host", "www.amazon.cn");
conn.header("Upgrade-Insecure-Requests", "1");
conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36");
conn.ignoreContentType(true);
conn.execute();
String html=conn.get().html();
//System.out.println( html);
return html;
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}

return "请求的页面不存在或链接错误 !!!";
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值