最近想做自己的一个应用市场,但是苦于,没有应用数据,因此,用jsoup来抓取,腾讯应用市场的软件APP各种数据,
挺不错的。哈哈。你要啥数据都有哈。 不错。
直接看代码
封装了 下代码。
大家直接输入,腾讯应用宝的 具体软件地址,即可,进行 解析了。
- package com.ferris.event.app;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
- import org.jsoup.Jsoup;
- import org.jsoup.helper.StringUtil;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import com.ferris.entity.AppDetail;
- import com.ferris.entity.AppDownloadInfo;
- import com.ferris.utils.StringUtils;
- public class AppGetService {
- public static final String urlhead = "http://android.myapp.com/myapp/";
- private static final Object lock = new Object();
- public AppGetService() {
- // TODO Auto-generated constructor stub
- }
- public static List<AppDownloadInfo> getNetApp(String app) {
- synchronized (lock) {
- List<AppDownloadInfo> liAppDownloadInfos = new ArrayList<AppDownloadInfo>();
- Document doc;
- try {
- doc = Jsoup.connect(app).get();
- Elements ListDiv = doc.getElementsByAttributeValue("class",
- "app-info-desc");
- for (Element element : ListDiv) {
- AppDownloadInfo appDownloadInfo = new AppDownloadInfo();
- appDownloadInfo.setAppname(element.select("a").attr(
- "appname"));
- appDownloadInfo.setAppsize(element.getElementsByClass(
- "size").text());
- appDownloadInfo.setTypename(element.getElementsByClass(
- "download").text());
- appDownloadInfo.setAppmd5(urlhead
- + element.select("a").attr("href"));
- appDownloadInfo.setIconurl(element.select("a").attr(
- "appicon"));
- appDownloadInfo.setPackagename(element.select("a").attr(
- "apk"));
- // http://dd.myapp.com/16891/548EBCD7DD5F97E652615EECBD352905.apk?fsname=com%2Etencent%2Emobileqq%5F5%2E2%2E1%5F182.apk&asr=8eff
- String downloadurl=element.select("a").attr(
- "ex_url");
- if(!StringUtils.isEmpty(downloadurl)){
- appDownloadInfo.setDownloadUrl(downloadurl.substring(0, downloadurl.indexOf("?")));
- }
- liAppDownloadInfos.add(appDownloadInfo);
- appDownloadInfo = null;
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- return null;
- }
- return liAppDownloadInfos;
- }
- }
- public static AppDetail getNetAppDetail(String appdetail) {
- synchronized (lock) {
- AppDetail appDetail = new AppDetail();
- Elements listDiv = null;
- Document doc;
- try {
- doc = Jsoup.connect(appdetail).get();
- if (doc == null) {
- return null;
- }
- List<String> tupian = new ArrayList<String>();
- listDiv = doc.getElementsByAttributeValue("class",
- "pic-img-box");
- if (listDiv != null && listDiv.size() > 0) {
- for (Element element : listDiv) {
- tupian.add(element.select("img").attr("data-src"));
- }
- appDetail.setPreviewurl(tupian);
- listDiv = null;
- }
- listDiv = doc.getElementsByAttributeValue("class",
- "det-othinfo-data");
- if (listDiv != null && listDiv.size() > 0) {
- final List<String> string = new ArrayList<String>();// 获取到
- // //
- // 版本号,以及开发商
- int z = 1;
- for (Element element : listDiv) {
- if (element.childNodeSize() > 0) {
- if (z == 1) {
- appDetail.setAppversion(element.text());
- ++z;
- }
- if (z == 2) {
- appDetail.setKaifashang(element.text());
- }
- }
- }
- listDiv = null;
- }
- listDiv = doc.getElementsByAttributeValue("class",
- "det-app-data-info");
- if (listDiv != null && listDiv.size() > 0) {
- for (Element element : listDiv) {
- if (element.childNodeSize() > 0) {
- appDetail.setDescription(element.text());
- }
- }
- listDiv = null;
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- return appDetail;
- }
- }
- }