微博获取原图时重定向到图片的url,所以获取的是乱码
jsoup默认是执行重定向的。
//根据Url获取页面对应的Document
public static Document getDoc1(String url){
Document doc = null;
try {
Connection con = Jsoup.connect(url); //获取请求连接
//浏览器可接受的MIME类型。
con.header("Accept-Encoding", "gzip, deflate");
con.header("Connection", "keep-alive");
con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
con.header("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
con.header("Cache-Control", "max-age=0");
con.header("Cookie", cookie);
con.header("Host", "weibo.cn");
con.header("User-Agent", " Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");
con.ignoreContentType(true);
con.followRedirects(true);
Response r = con.execute();
System.out.println(r.url());
// doc = con.execute()
// con.followRedirects(arg0)
// Connection.Response con_resp = con.response();
// System.out.println(con_resp.header());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return doc;
}