package com.thief.parser.impl;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HttpException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HTTP;
import org.apache.log4j.Logger;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import com.thief.parser.IMail163Parser;
import com.thief.po.Contact;
import com.thief.util.HttpUtil;
import com.thief.util.StringUtil;
public class Mail163ParserImpl implements IMail163Parser{
private String loginUrl;
private static final String charCode = HTTP.UTF_8;
public List parser(String email, String password) throws HttpException,
IOException, InterruptedException, URISyntaxException {
DefaultHttpClient client = new DefaultHttpClient();
try {
String loginRes = login(email, password,client);
return parser(client,loginRes, email);
} catch (Exception e) {
// TODO: handle exception
}finally{
client.getConnectionManager().shutdown();
}
return null;
}
public String login(String email, String password, HttpClient client)
throws IllegalStateException, URISyntaxException, IOException, HttpException, InterruptedException{
Map map = new HashMap();
map.put(".verifycookie", "1");
map.put("style", "35");
map.put("product", "mail163");
map.put("username", email);
map.put("password", password);
map.put("selType=", "jy");
map.put("remUser", "on");
map.put("secure", "on");
String res = HttpUtil.doPost(client, loginUrl,map, charCode);
if (res.indexOf("跳转提示") != -1) {
HttpGet get = new HttpGet(
"http://entry.mail.163.com/coremail/fcg/ntesdoor2?username="+email+"&lightweight=1&verifycookie=1&language=-1&style=-1");
res = StringUtil.readInputStream(client.execute(get)
.getEntity().getContent(), charCode);
}else if(res.indexOf("errorType")!=-1){
throw new RuntimeException("帐号或密码错误");
}
return res;
}
String getUsers="http://tg4a84.mail.163.com/jy3/address/addrprint.jsp?sid=";
private static String regex = "iframe src=\"index.jsp\\?sid=([^\"]+)";
public List parser(HttpClient client, String content, String email) throws IllegalStateException, URISyntaxException, IOException, HttpException, InterruptedException {
//iframe src="index.jsp?sid=zBObqxwciWMxDZiIlwccEFhCuYOLgipm"
String id = StringUtil.getByRegex(regex, 1, content);
if(id == null || "".equals(id.trim())){
throw new RuntimeException("没能获取到关键ID");
}
String userJson = getUsers+id;
Map map = new HashMap();
//String res = HttpUtil.doPost(client, userJson,map , charCode);
log.info(userJson);
userJson = userJson+"&dd=" + System.currentTimeMillis();
String res = HttpUtil.doGet(client, userJson, null);
log.info(res);
List contactList = new ArrayList();
/*try {
parseByHtmlParser(res);
} catch (ParserException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if(1==1 )return contactList;
*/String aa = "
邮件地址: | (.*?) |
---|
Pattern p = Pattern.compile(aa,Pattern.DOTALL);
Matcher m = p.matcher(res);
int groupNum = 0;
int firstIndex = 0;
while(m.find(firstIndex))
{
String name = m.group(1);
String email1 = m.group(2);
contactList.add(new Contact(name,email1));
firstIndex = m.end();
groupNum++;
}
return contactList;
}
NodeFilter filter = new AndFilter(new NodeClassFilter(TableTag.class), new HasAttributeFilter("class", "gTable"));//new HasAttributeFilter("class","gTable");
private List parseByHtmlParser(String content) throws ParserException{
List contactList = new ArrayList();
Parser p = new Parser();
p.setInputHTML(content);
NodeList nodeList = p.extractAllNodesThatMatch(filter);
if(nodeList != null && nodeList.size()!=0){
for(int i=0;i
Node node = nodeList.elementAt(i);
System.out.println(node.toHtml());
}
}
return contactList;
}
public void setLoginUrl(String loginUrl) {
this.loginUrl = loginUrl;
}
public static void main(String[] args) {
String content = "
网易电子邮箱 - 极速4.0";String res = StringUtil.getByRegex(regex, 1, content);
System.out.println(res);
}
Logger log = Logger.getLogger(Mail163ParserImpl.class);
}