因为要用到天气信息,在网上找了免费的api,不是有调用限制就是返回的结果不满意,看了百度的比较合适,决定用爬虫抓下来
在百度上搜索"北京天气",将浏览器的地址复制,作为请求地址
添加jar包
<!-- jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.3</version>
</dependency>
抓取程序
package com.orange.utils;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class TestWeather {
private static String weatherUrl = "http://www.baidu.com/baidu?tn=monline_3_dg&ie=utf-8&wd=%E5%8C%97%E4%BA%AC%E5%A4%A9%E6%B0%94";
public static void getWeather() {
String userAgent = UserAgentUtil.getUserAgents();
try {
Document doc = Jsoup.connect(weatherUrl).userAgent(userAgent).timeout(5000).get();
Elements a = doc.getElementsByClass("op_weather4_twoicon").get(0).getElementsByTag("a");
for (Element element : a) {
String quality = "";
String current = "";
String today = "";
//只有当天才有实时温度
if (!element.getElementsByClass("op_weather4_twoicon_shishi_title").isEmpty()) {
current = element.getElementsByClass("op_weather4_twoicon_shishi_title").text();
}
//空气质量
if (!element.getElementsByClass("op_weather4_twoicon_aqi_text_today").isEmpty()) {
quality = element.getElementsByClass("op_weather4_twoicon_aqi_text_today").text();
}else {
quality = element.getElementsByClass("op_weather4_twoicon_aqi_text").text();
}
//日期
if (!element.getElementsByClass("op_weather4_twoicon_date").isEmpty()) {
today = element.getElementsByClass("op_weather4_twoicon_date").text();
}else {
today = element.getElementsByClass("op_weather4_twoicon_date_day").text();
}
//风
String wind = element.getElementsByClass("op_weather4_twoicon_wind").text();
//天气
String weath = element.getElementsByClass("op_weather4_twoicon_weath").text();
//气温
String temp = element.getElementsByClass("op_weather4_twoicon_temp").text();
System.out.println(quality);
System.out.println(current);
System.out.println(today);
System.out.println(wind);
System.out.println(weath);
System.out.println(temp);
System.out.println("=============================");
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
getWeather();
}
}、
UserAgentUtil 类 (作用:设置浏览器版本,每次抓取的时候随机取一个浏览器版本,模拟不同浏览器访问,可能并没什么用)
package com.orange.utils;
import java.util.Random;
public class UserAgentUtil {
private static final String[] USER_AGENTS = {
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"
};
public static String getUserAgents() {
Random random = new Random();
int index = random.nextInt(USER_AGENTS.length);
return USER_AGENTS[index];
}
public static String getUserAgents(int index) {
if (index < 0 || index > USER_AGENTS.length) {
return getUserAgents();
}
return USER_AGENTS[index];
}
}
运行程序:
=============================轻度
35
06月05日 周二 农历四月廿二
西南风4-5级
多云
25 ~ 38℃
=============================
中度
周三
北风3级
多云
25 ~ 36℃
=============================
良
周四
南风3级
多云
23 ~ 34℃
=============================
良
周五
北风3级
多云转雷阵雨
20 ~ 33℃
=============================
良
周六
北风3级
雷阵雨转小雨
17 ~ 26℃