近来今日头条特别的火,就拿来研究研究!
头条的新闻内容和新闻评论是开放的,无需登陆就可以获取的到!
但是新闻的评论是需要登陆后才能进行评论的,于是开始模拟头条的用户登陆:
1.模拟登陆的操作能够获取到cookie值,定位initCookie,但是该cookie值不能直接用来作为对新闻进行评论的请求参数。
2.需要找到一个新闻url,例如:http://toutiao.com/a4583986550/,将上部获取的cookie值作为参数,模拟请求,获取到cookie,定为lastCookie.
3.将initCookie和lastCookie进行拼接就可获取到最终的可用于新闻评论的cookie
代码如下:
private String doLoginTouTiao2(String loginName, String password) { String url = "https://mp.toutiao.com/auth/login_post/"; Map<String, String> headers = new HashMap<String, String>(); Map<String, String> params = new HashMap<String, String>(); headers.put("Host", "mp.toutiao.com"); headers.put("Connection", "keep-alive"); headers.put("Origin", "https://mp.toutiao.com"); headers.put( "User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36"); headers.put("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"); headers.put("Accept", "application/json, text/javascript, */*; q=0.01"); headers.put("X-Requested-With", "XMLHttpRequest"); headers.put("X-CSRFToken", "b3ebed559047fa1cacaccc6ccfb66851"); headers.put("Referer", "https://mp.toutiao.com/login/"); // headers.put("Accept-Encoding", "gzip, deflate"); headers.put("Accept-Language", "zh-CN,zh;q=0.8"); // String cookie = "uuid=\"w:0256c3403d82490abcc2c7819d48865f\"; tt_webid=3932107506; utm_source=toutiao; __utma=24953151.381297160.1435021810.1435023762.1435106245.3; __utmc=24953151; __utmz=24953151.1435106245.3.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic|utmctr=%E4%BB%8A%E6%97%A5%E5%A4%B4%E6%9D%A1; _gat=1; sessionid=249d2efbe15e67a4ff000a9ffe489c4e; csrftoken=b3ebed559047fa1cacaccc6ccfb66851; _ga=GA1.2.381297160.1435021810"; // headers.put("Cookie", cookie); params.put("name_or_email", loginName); params.put("password", password); HttpResponse response = HttpUtils.doPost(url, headers, params); List<Cookie> cookies = HttpUtils.getResponseCookies(response); String cookieStr = HttpUtils.setCookie2String(cookies); System.out.println("cookieStr:" + cookieStr); String responseText = HttpUtils.getStringFromResponse2(response, "utf-8"); System.out.println(responseText); return HttpUtils.setCookie2String(cookies); }
获取lastCookie值:
public void publicNewsComment(String cookieStr,String csrfToken) { String url="http://toutiao.com/group/4560598775/post_comment/"; Map<String,String> headers=new HashMap<String,String>(); Map<String, String> params = new HashMap<String, String>(); headers.put("Accept", "application/json, text/javascript, */*; q=0.01"); headers.put("Accept-Language", "zh-CN,zh;q=0.8"); headers.put("Connection", "keep-alive"); headers.put("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"); headers.put("Host", "toutiao.com"); headers.put("Origin", "http://toutiao.com"); headers.put("Referer", "http://toutiao.com/a4560598775/"); headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36"); headers.put("X-CSRFToken", csrfToken); headers.put("X-Requested-With", "XMLHttpRequest"); // cookieStr="uuid=\"w:0256c3403d82490abcc2c7819d48865f\"; tt_webid=3932107506; sessionid=249d2efbe15e67a4ff000a9ffe489c4e; _ga=GA1.2.381297160.1435021810; __utmt=1; utm_source=toutiao; csrftoken=382a7061c9b0b4abe3c4558ad3847df4; __utma=24953151.381297160.1435021810.1435117044.1435124287.5; __utmb=24953151.6.8.1435124301975; __utmc=24953151; __utmz=24953151.1435124287.5.3.utmcsr=baidu|utmccn=(organic)|utmcmd=organic|utmctr=%E4%BB%8A%E6%97%A5%E5%A4%B4%E6%9D%A1"; // cookieStr="uuid=\"w:0256c3403d82490abcc2c7819d48865f\"; tt_webid=3932107506; sessionid=249d2efbe15e67a4ff000a9ffe489c4e; _ga=GA1.2.381297160.1435021810; __utmt=1; utm_source=toutiao; csrftoken=382a7061c9b0b4abe3c4558ad3847df4; __utma=24953151.381297160.1435021810.1435117044.1435124287.5; __utmb=24953151.6.8.1435124301975; __utmc=24953151; __utmz=24953151.1435124287.5.3.utmcsr=baidu|utmccn=(organic)|utmcmd=organic|utmctr=%E4%BB%8A%E6%97%A5%E5%A4%B4%E6%9D%A1"; // cookieStr=cookieStr+"csrftoken=382a7061c9b0b4abe3c4558ad3847df4;"; headers.put("Cookie", cookieStr); params.put("status", "啊啊啊啊啊!你这么做让释永信大僧脸面何在啊!"); HttpResponse response = HttpUtils.doPost(url, headers, params); System.out.println(response); String responseText =HttpUtils.getStringFromResponse2(response,"utf-8"); System.out.println(responseText); }
运行main方法:
public static void main(String[] args) { LoginTouTiao loginTT = new LoginTouTiao(); String cookieStr = loginTT.doLoginTouTiao2("763197812@qq.com", "123456"); System.out.println(cookieStr); String csrfToken=loginTT.fetchCsrfToken2(cookieStr); cookieStr=cookieStr+";"+csrfToken; String[] ss=csrfToken.split(";"); String s=ss[0]; String[] tokens=s.split("="); String token=tokens[1]; System.out.println(token); SpiderTouTiao2 spider=new SpiderTouTiao2(); spider.publicNewsComment(cookieStr,token); }