package com.sxit;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Scanner;
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
/**
* @功能:单线程 抓取新浪微博 相册图片
* @作者: smile
* @时间:2013-2-3 下午10:56:33
* @版本:1.0
*/
public class MoPic {
private final static HttpClient client = new DefaultHttpClient();
// 存放图片地址
private static List<String> picList = new ArrayList<String>();
public static void main(String[] args) {
try {
Scanner scan = new Scanner(System.in);
System.out.println("请输入你的用户名:");
String username = scan.nextLine();
System.out.println("请输入你的密码:");
String password = scan.nextLine();
System.out.println("请输入目标用户的用户名:");
String targetname = scan.nextLine();
System.out.println("请输入需要下载的相片数量:");
int count = Integer.parseInt(scan.nextLine());
//登入
login(username, password, targetname, count);
//下载
upload(targetname);
} catch (IOException e) {
e.printStackTrace();
} catch (JSONException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* @功能:下载图片
* @时间:2013-2-4 上午11:00:37
*/
public static void upload(String targetname) throws Exception {
InputStream is = null;
OutputStream os = null;
URL url = null;
HttpURLConnection con = null;
// 判断保存路径是否存在 不存在则新建文件夹
File f = new File("E:\\tmp\\"+targetname);
if (!f.exists()) {
f.mkdir();
}
if (picList != null) {
for (int i = picList.size() - 1; i >= 0; i--) {
try {
String p_url = picList.get(i);
if (p_url != null && !"".equals(p_url)) {
url = new URL(p_url);
//截取后缀
int index = p_url.lastIndexOf(".");
System.out.println("索引位:"+index);
String pos = p_url.substring(index);
System.out.println("后缀为:"+pos);
con = (HttpURLConnection) url.openConnection();
// 设置连接超时
con.setConnectTimeout(100 * 1000);
// 设置读取超时
con.setReadTimeout(100 * 1000);
is = new BufferedInputStream(con.getInputStream());
os = new BufferedOutputStream(new FileOutputStream(new File("E:/tmp/"+targetname+"/" + i + pos)));
byte[] b = new byte[1024];
int length = 0;
while ((length = is.read(b)) != -1) {
os.write(b, 0, length);
}
os.flush();
System.out.println("下载完第" + i + "张图");
}
}catch (Exception e) {
continue;
}
}
}else{
System.out.println("无相片信息!");
}
}
// 登入新浪微博
public static void login(String username, String password, String targetName, int pCount) throws IOException, JSONException {
HttpPost post = new HttpPost("http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)");
post.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0");
post.setHeader("Referer", "http://weibo.com/");
post.setHeader("Content-Type", "application/x-www-form-urlencoded");
String data = getServerTime();
String nonce = makeNonce(6);
// 登录表单的信息
List<NameValuePair> qparams = new ArrayList<NameValuePair>();
qparams.add(new BasicNameValuePair("entry", "weibo"));
qparams.add(new BasicNameValuePair("gateway", "1"));
qparams.add(new BasicNameValuePair("from", ""));
qparams.add(new BasicNameValuePair("savestate", "0"));
qparams.add(new BasicNameValuePair("useticket", "1"));
qparams.add(new BasicNameValuePair("pagerefer", ""));
qparams.add(new BasicNameValuePair("service", "miniblog"));
qparams.add(new BasicNameValuePair("servertime", data));
qparams.add(new BasicNameValuePair("nonce", nonce));
qparams.add(new BasicNameValuePair("pwencode", "wsse"));
qparams.add(new BasicNameValuePair("encoding", "UTF-8"));
qparams.add(new BasicNameValuePair("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack"));
qparams.add(new BasicNameValuePair("returntype", "META"));
// 用户名处理
qparams.add(new BasicNameValuePair("su", encodeAccount(username)));
qparams.add(new BasicNameValuePair("sp", new SinaSSOEncoder().encode(password, data, nonce)));
UrlEncodedFormEntity params = new UrlEncodedFormEntity(qparams, "utf-8");
post.setEntity(params);
HttpResponse response = client.execute(post);
String entity = EntityUtils.toString(response.getEntity());
System.out.println("entity为:" + entity);
String url = entity.substring(entity.indexOf("http%3A%2F%2Fweibo.com%2Fajaxlogin.php"), entity.indexOf("code=0") + 6);
url = URLDecoder.decode(url);
System.out.println("真实地址为:" + url);
// 获取到实际url进行连接
HttpGet getMethod = new HttpGet(url);
response = client.execute(getMethod);
entity = EntityUtils.toString(response.getEntity());
System.out.println("----->>>" + entity);
entity = entity.substring(entity.indexOf("userdomain") + 13, entity.lastIndexOf("\""));
System.out.println("......." + entity);
getMethod = new HttpGet("http://weibo.com/" + entity);
response = client.execute(getMethod);
String uid = EntityUtils.toString(response.getEntity());
uid = uid.substring(uid.indexOf("oid") + 9, uid.lastIndexOf("$CONFIG['onick']") - 3);
// 这里获取的是登入用户的uid
System.out.println(uid);
// 这里去访问别的用户的微博 输入用户名 比如:bearsun
getMethod = new HttpGet("http://weibo.com/" + targetName);
response = client.execute(getMethod);
String pid = EntityUtils.toString(response.getEntity());
pid = pid.substring(pid.indexOf("oid") + 9, pid.lastIndexOf("$CONFIG['onick']") - 3);
// 访问目标用户的pid
System.out.println(pid);
// 这里只取微博配图中的图片http://photo.weibo.com/1511804135/talbum/index?from=profile_wb
getMethod = new HttpGet("http://photo.weibo.com/" + pid + "/talbum/index?from=profile_wb");
response = client.execute(getMethod);
String albumId = EntityUtils.toString(response.getEntity());
albumId = albumId.substring(albumId.indexOf("album_id") + 9, albumId.indexOf("album_info") - 36);
// 相册id
System.out.println(albumId);
// http://ww3.sinaimg.cn/mw690/6fb242fdjw1dzke8vygnwj.jpg
// http://photo.weibo.com/photos/get_all?uid=1511804135&album_id=14503807&count=32&page=1&type=3
getMethod = new HttpGet("http://photo.weibo.com/photos/get_all?uid=" + pid + "&album_id=" + albumId + "&count=" + pCount + "&page=1&type=3");
response = client.execute(getMethod);
// 返回的是一个json数组
entity = EntityUtils.toString(response.getEntity());
JSONObject a = new JSONObject(entity);
// 获取图片信息json数组
System.out.println(a.get("data").toString());
JSONArray list = new JSONObject(a.get("data").toString()).getJSONArray("photo_list");
for (int i = 0; i < list.length(); i++) {
JSONObject temp = (JSONObject) list.get(i);
String pic_name = "http://ww3.sinaimg.cn/mw690/" + temp.getString("pic_name");
System.out.println(pic_name);
picList.add(pic_name);
}
}
// 登入账号处理
private static String encodeAccount(String account) {
String userName = "";
try {
userName = Base64.encodeBase64String(URLEncoder.encode(account, "UTF-8").getBytes());
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return userName;
}
private static String makeNonce(int len) {
String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String str = "";
for (int i = 0; i < len; i++) {
str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x.length()));
}
return str;
}
private static String getServerTime() {
long servertime = new Date().getTime() / 1000;
return String.valueOf(servertime);
}
}
package com.sxit;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Scanner;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
/**
* @功能:多线程抓取新浪微博 相册图片
* @作者: smile
* @时间:2013-2-3 下午10:56:33
* @版本:1.0
*/
public class MoPicThread {
private final static HttpClient client = new DefaultHttpClient();
// 存放图片地址
private static List<String> picList = new ArrayList<String>();
public static void main(String[] args) {
try {
Scanner scan = new Scanner(System.in);
System.out.println("请输入你的用户名:");
String username = scan.nextLine();
System.out.println("请输入你的密码:");
String password = scan.nextLine();
System.out.println("请输入目标用户的用户名:");
String targetname = scan.nextLine();
System.out.println("请输入需要下载的相片数量:");
int count = Integer.parseInt(scan.nextLine());
// 登入
login(username, password, targetname, count);
// 下载
upload(targetname);
} catch (IOException e) {
e.printStackTrace();
} catch (JSONException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* @功能:下载图片
* @时间:2013-2-4 上午11:00:37
*/
public static void upload(String targetname) throws Exception {
// 建立线程池
ExecutorService executor = Executors.newFixedThreadPool(10);
// 判断保存路径是否存在 不存在则新建文件夹
File f = new File("E:\\tmp\\" + targetname);
if (!f.exists()) {
f.mkdir();
}
if (picList != null) {
// 将图片分段下载
for (int i = 1,count = picList.size() / 20; i <= count; i++) {
int start = (i - 1) * 20;
int end = 0;
if (i != 20) {
end = i*20-1;
} else {
end = picList.size()-1;
}
ImagThread thread = new ImagThread(start, end, targetname);
executor.submit(thread);
}
executor.shutdown();
} else {
System.out.println("无相片信息!");
}
}
/**
* @功能:多线程下载图片到本地
* @时间:2013-2-4 下午1:59:02
*/
static class ImagThread implements Runnable {
// 起始
private int start;
// 终止
private int end;
// 目标用户名
private String targetname;
public ImagThread(int start, int end, String targetname) {
this.start = start;
this.end = end;
this.targetname = targetname;
}
public void run() {
for (int i = start; i <= end; i++) {
try {
uploadImag(i, targetname, picList.get(i));
} catch (Exception e) {
System.out.println("第" + i + "张图片下载失败,地址为:" + picList.get(i));
continue;
}
System.out.println("线程"+Thread.currentThread().getName()+"下载完第"+i+"张图片");
}
}
}
/**
* @功能:下载单个图片到本地
*/
public static void uploadImag(int i, String targetname, String p_url) throws Exception {
InputStream is = null;
OutputStream os = null;
URL url = null;
HttpURLConnection con = null;
try {
url = new URL(p_url);
// 截取后缀
int index = p_url.lastIndexOf(".");
String pos = p_url.substring(index);
con = (HttpURLConnection) url.openConnection();
// 设置连接超时
con.setConnectTimeout(100 * 1000);
// 设置读取超时
con.setReadTimeout(100 * 1000);
is = new BufferedInputStream(con.getInputStream());
os = new BufferedOutputStream(new FileOutputStream(new File("E:/tmp/" + targetname + "/" + i + pos)));
byte[] b = new byte[1024];
int length = 0;
while ((length = is.read(b)) != -1) {
os.write(b, 0, length);
}
os.flush();
} finally {
is.close();
os.close();
}
}
// 登入新浪微博
public static void login(String username, String password, String targetName, int pCount) throws IOException, JSONException {
HttpPost post = new HttpPost("http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)");
post.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0");
post.setHeader("Referer", "http://weibo.com/");
post.setHeader("Content-Type", "application/x-www-form-urlencoded");
String data = getServerTime();
String nonce = makeNonce(6);
// 登录表单的信息
List<NameValuePair> qparams = new ArrayList<NameValuePair>();
qparams.add(new BasicNameValuePair("entry", "weibo"));
qparams.add(new BasicNameValuePair("gateway", "1"));
qparams.add(new BasicNameValuePair("from", ""));
qparams.add(new BasicNameValuePair("savestate", "0"));
qparams.add(new BasicNameValuePair("useticket", "1"));
qparams.add(new BasicNameValuePair("pagerefer", ""));
qparams.add(new BasicNameValuePair("service", "miniblog"));
qparams.add(new BasicNameValuePair("servertime", data));
qparams.add(new BasicNameValuePair("nonce", nonce));
qparams.add(new BasicNameValuePair("pwencode", "wsse"));
qparams.add(new BasicNameValuePair("encoding", "UTF-8"));
qparams.add(new BasicNameValuePair("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack"));
qparams.add(new BasicNameValuePair("returntype", "META"));
// 用户名处理
qparams.add(new BasicNameValuePair("su", encodeAccount(username)));
qparams.add(new BasicNameValuePair("sp", new SinaSSOEncoder().encode(password, data, nonce)));
UrlEncodedFormEntity params = new UrlEncodedFormEntity(qparams, "utf-8");
post.setEntity(params);
HttpResponse response = client.execute(post);
String entity = EntityUtils.toString(response.getEntity());
System.out.println("entity为:" + entity);
String url = entity.substring(entity.indexOf("http%3A%2F%2Fweibo.com%2Fajaxlogin.php"), entity.indexOf("code=0") + 6);
url = URLDecoder.decode(url);
System.out.println("真实地址为:" + url);
// 获取到实际url进行连接
HttpGet getMethod = new HttpGet(url);
response = client.execute(getMethod);
entity = EntityUtils.toString(response.getEntity());
System.out.println("----->>>" + entity);
entity = entity.substring(entity.indexOf("userdomain") + 13, entity.lastIndexOf("\""));
System.out.println("......." + entity);
getMethod = new HttpGet("http://weibo.com/" + entity);
response = client.execute(getMethod);
String uid = EntityUtils.toString(response.getEntity());
uid = uid.substring(uid.indexOf("oid") + 9, uid.lastIndexOf("$CONFIG['onick']") - 3);
// 这里获取的是登入用户的uid
System.out.println(uid);
// 这里去访问别的用户的微博 输入用户名 比如:bearsun
getMethod = new HttpGet("http://weibo.com/" + targetName);
response = client.execute(getMethod);
String pid = EntityUtils.toString(response.getEntity());
pid = pid.substring(pid.indexOf("oid") + 9, pid.lastIndexOf("$CONFIG['onick']") - 3);
// 访问目标用户的pid
System.out.println(pid);
// 这里只取微博配图中的图片http://photo.weibo.com/1511804135/talbum/index?from=profile_wb
getMethod = new HttpGet("http://photo.weibo.com/" + pid + "/talbum/index?from=profile_wb");
response = client.execute(getMethod);
String albumId = EntityUtils.toString(response.getEntity());
albumId = albumId.substring(albumId.indexOf("album_id") + 9, albumId.indexOf("album_info") - 36);
// 相册id
System.out.println(albumId);
// http://ww3.sinaimg.cn/mw690/6fb242fdjw1dzke8vygnwj.jpg
// http://photo.weibo.com/photos/get_all?uid=1511804135&album_id=14503807&count=32&page=1&type=3
getMethod = new HttpGet("http://photo.weibo.com/photos/get_all?uid=" + pid + "&album_id=" + albumId + "&count=" + pCount + "&page=1&type=3");
response = client.execute(getMethod);
// 返回的是一个json数组
entity = EntityUtils.toString(response.getEntity());
JSONObject a = new JSONObject(entity);
// 获取图片信息json数组
System.out.println(a.get("data").toString());
JSONArray list = new JSONObject(a.get("data").toString()).getJSONArray("photo_list");
for (int i = 0; i < list.length(); i++) {
JSONObject temp = (JSONObject) list.get(i);
String pic_name = "http://ww3.sinaimg.cn/mw690/" + temp.getString("pic_name");
System.out.println(pic_name);
picList.add(pic_name);
}
}
// 登入账号处理
private static String encodeAccount(String account) {
String userName = "";
try {
userName = Base64.encodeBase64String(URLEncoder.encode(account, "UTF-8").getBytes());
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return userName;
}
private static String makeNonce(int len) {
String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String str = "";
for (int i = 0; i < len; i++) {
str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x.length()));
}
return str;
}
private static String getServerTime() {
long servertime = new Date().getTime() / 1000;
return String.valueOf(servertime);
}
}