使用HttpClient 、Jsoup的爬虫获取指定网页内容以及下载图片

最新推荐文章于 2024-04-19 20:10:27 发布

initialise

最新推荐文章于 2024-04-19 20:10:27 发布

阅读量943

点赞数

分类专栏： Android 文章标签： android 爬虫

本文链接：https://blog.csdn.net/printlnout/article/details/82496069

版权

Android 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

HttpClient：读取指定URL网页内容；

Jsoup：解析所要的页面数据；

public static String getHtmlByUrl(String id) {
		if (id != null && !id.equals("")) {
			String html = null;
			String userAgent = "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36";
			String url = "https://movie.douban.com/celebrity/" + id + "/";
			HttpClient httpClient = new DefaultHttpClient();
			try {
				Thread.sleep(4000);
				HttpGet httpget = new HttpGet(url);
				httpget.setHeader("User-Agent",userAgent);
				HttpResponse responce = httpClient.execute(httpget);
				int resStatu = responce.getStatusLine().getStatusCode();
				if (resStatu == HttpStatus.SC_OK) {
					HttpEntity entity = responce.getEntity();
					if (entity != null) {
					html = EntityUtils.toString(entity);
							Document document = Jsoup.parse(html);
						Element wapElement = document.getElementById("wrapper");
						Element contentElement = wapElement.getElementById("content");
						String imgSrc = contentElement.getElementById("headline").select("a").attr("href");
						Log.d(TAG, "getHtmlByUrl: "+imgSrc);
						return imgSrc;
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			} finally {
				httpClient.getConnectionManager().shutdown();
			}

		}
		return null;
	}

解析豆瓣网页并获取电影海报下载并存到指定目录

public String setImage(String path, String imageName) {
		try {
			// 把传过来的路径转成URL
			URL url = new URL(path);
			// 获取连接
			HttpURLConnection connection = (HttpURLConnection) url.openConnection();
			// 使用GET方法访问网络
			connection.setRequestMethod("GET");
			// 超时时间为10秒
			connection.setConnectTimeout(10000);
			if (connection.getResponseCode() == 200) {
				InputStream inputStream = connection.getInputStream();
				// 使用工厂把网络的输入流生产Bitmap
				Bitmap bitmap = BitmapFactory.decodeStream(inputStream);
				String imgePath = saveImageDish(bitmap, imageName);
				inputStream.close();
				return imgePath;
			} else if (connection.getResponseCode() == 429) {
				Thread.sleep(1000);
				setImage(path, imageName);
			}

		} catch (Exception e) {
			e.printStackTrace();
		}
		return "";
	}

// 将图片保存到本地
	private String saveImageDish(Bitmap bitmap, String imageName) {
		try {
			File files = new File(SystemState.MovieImagePath);
			if (!files.exists()) {
				files.mkdir();
			}
			File file = new File(files, imageName + ".jpg");
			FileOutputStream objct = new FileOutputStream(file);
			bitmap.compress(Bitmap.CompressFormat.JPEG, 100, objct);
			objct.close();
			return StringUtil.MOVIE_IMAGE + file.getName();
		} catch (Exception e) {
			e.printStackTrace();
		}
		return "";
	}