Android小软件看知乎收藏夹

最新推荐文章于 2024-09-26 23:53:33 发布

poklau123

最新推荐文章于 2024-09-26 23:53:33 发布

阅读量1.1k

点赞数

分类专栏： Curl android PHP 文章标签： PHP android json

本文链接：https://blog.csdn.net/poklau/article/details/46815783

版权

PHP 同时被 3 个专栏收录

4 篇文章 0 订阅

订阅专栏

android

3 篇文章 0 订阅

订阅专栏

Curl

2 篇文章 0 订阅

订阅专栏

（可爱的刘看山用来做图标还是不错的！）

有没有发现知乎的Android平台客户端不支持看自己关注的收藏夹，每次都用电脑看感觉好麻烦啊，所以写了这个小软件，用PHP（暂时不会Python……）爬知乎 - 与世界分享你的知识、经验和见解来获取收藏夹里面的内容，将结果返回json数据：

{"URL":["http:\/\/www.zhihu.com\/collection\/19906419","http:\/\/www.zhihu.com\/collection\/29347192","http:\/\/www.zhihu.com\/collection\/34260000","http:\/\/www.zhihu.com\/collection\/21104418","http:\/\/www.zhihu.com\/collection\/30984146","http:\/\/www.zhihu.com\/collection\/42109172","http:\/\/www.zhihu.com\/collection\/19633165","http:\/\/www.zhihu.com\/collection\/26815754"],"TITLE":["\u62cd\u6848\u53eb\u7edd\u6216\u5927\u7b11","\u5982\u679c\u8fd8\u6709\u4ec0\u4e48\u80fd\u591f\u957f\u59ff\u52bf","\u4e00\u672c\u6b63\u7ecf\u5730\u626f\u6de1","\u7231\u60c5\u3002\u4ee5\u7231\u4e4b\u540d\u3002","\u8fd9\u624d\u662f\u771f\u6b63\u725b\u903c\u7684\u6545\u4e8b","\u5410\u69fd\u5410\u5f97\u6211\u90fd\u5c04\u4e86\uff01\u96c5\u881b\u8776\uff01","\u6253\u51fb\u9898\u4e3b","\u77e5\u5973\u4e4e"],"NUM":[6,0,0,0,0,0,0,0],"SMALLTITLE":["\u7531 \u4e8e\u6e90 \u521b\u5efa \u2022 1041 \u4e2a\u7b54\u6848","\u7531 \u6768\u6d0b \u521b\u5efa \u2022 255 \u4e2a\u7b54\u6848","\u7531 \u8c22\u676d \u521b\u5efa \u2022 678 \u4e2a\u7b54\u6848","\u7531 \u4e01\u5e7f\u6770 \u521b\u5efa \u2022 782 \u4e2a\u7b54\u6848","\u7531 \u6587\u897f \u521b\u5efa \u2022 41 \u4e2a\u7b54\u6848","\u7531 \u5218\u5947\u5947 \u521b\u5efa \u2022 368 \u4e2a\u7b54\u6848","\u7531 \u738b\u7d0d\u7c73 \u521b\u5efa \u2022 2982 \u4e2a\u7b54\u6848","\u7531 \u7ae5\u540d \u521b\u5efa \u2022 315 \u4e2a\u7b54\u6848"],"CREATOR_URL":["http:\/\/www.zhihu.com\/people\/yu_yuan","http:\/\/www.zhihu.com\/people\/yang-yang-32-76","http:\/\/www.zhihu.com\/people\/Tse_","http:\/\/www.zhihu.com\/people\/ding-yan-jie-92","http:\/\/www.zhihu.com\/people\/VinceJ","http:\/\/www.zhihu.com\/people\/6hikki","http:\/\/www.zhihu.com\/people\/nano.wang","http:\/\/www.zhihu.com\/people\/hmybz"],"ICON":"http:\/\/pic3.zhimg.com\/4b1255a7e_l.jpg"}

客户端通过HttpClient拿到JSON数据并解析后，就可以把这个软件的主界面做好了（Android客户端怎么做不表，我基本也是第一次写，有点挫）。

点击后就可以用WebView来访问手机网页版知乎了，这点确实比较偷懒，不过也是因为没有自己的服务器，所有都写爬虫的话我的SAE豆子估计很快就没了。

<?php
/**
 * curl的模拟登陆封装
 * @param  [string]  	$url        [所请求的URL地址]
 * @param  [string]  	$post       [请求类型]
 * @param  [array]  	$header     [表头数组]
 * @param  [string]  	$cookie     [请求的cookie字符串]
 * @param  [array]  	$data 		[请求的表单数据]
 * @param  [boolean] 	$retHeader  [是否返回header(比如需要获取header中的cookie)]
 * @return [type]              		[description]
 */
function myCurl($url, $post, $header, $cookie, $data, $retHeader = false){
	$ch = curl_init();
	if($url != null)
		curl_setopt($ch, CURLOPT_URL, 		$url);
	if($header != null)
		curl_setopt($ch, CURLOPT_HTTPHEADER,$header);
	if($cookie != null)
		curl_setopt($ch, CURLOPT_COOKIE, 	$cookie);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
	curl_setopt($ch, CURLOPT_HEADER,	$retHeader);
	if($post == "post" || $post == "POST")
		curl_setopt($ch, CURLOPT_POST,  true);
	if($data != null){
		foreach ($data as $key => $value) {
			$dstr[] = $key.'='.$value;
		}
		$datafileds = implode('=', $dstr);
		curl_setopt($ch, CURLOPT_POSTFIELDS,$datafileds);
	}
	$ret = curl_exec($ch);
	curl_close($ch);
	return $ret;
}

/**
 * 获取网页中的header
 * @param  [string] $content [网页内容（包含header）]
 * @return [string]          [header的串]
 */
function getHeader($content){
	if($content)
		list($header, $body) = explode("\r\n\r\n", $content);
	else
		$header = null;
	return $header;
}

/**
 * 获取网页中的cookie
 * @param  [string] $content [网页内容（包含header）]
 * @return [array]          [cookie数组]
 */
function getCookie($content){
	$cookie = null;
	$header = getHeader($content);
	preg_match_all("/set\-cookie:([^\n\r]*)/i", $header, $matches,PREG_SET_ORDER);
	foreach ($matches as $key) {
		$cookie[] = $key[1];
	}
	return $cookie;
}

$url = "http://www.zhihu.com/collections";
$cookie = 'cookie字符串啦';
$header = array(
	'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
	'User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36 LBBROWSER',
	'Accept-Language: zh-CN,zh;q=0.8'
	);
$ret = myCurl($url, "get", $header, $cookie, null, false);
#echo $ret;die;
preg_match_all("/<h2\sclass=\"zm-item-title\">\n\n<a.+href=\"(.*)\"\s>(.*)<\/a>\n*(<span\sclass=\"zg-num\">(.*)<\/span>)*\n*<\/h2>\n.*\n.*<a.*href=\"(.*)\"\starget=\"_blank\".*>(.*)<\/a>.*\n.*>(.*)<\/span>/i", $ret, $array);
preg_match_all("/<a\shref=\"(\/people.*)\"/i", $ret, $mainPage);
$mainPage = $mainPage[1][1];
#var_dump($array);die;
$length = count($array[1]);
for ($i=0; $i < $length; $i++) { 
	$data["URL"][$i] = "http://www.zhihu.com".$array[1][$i];
	$data["TITLE"][$i] = $array[2][$i];
	$data["NUM"][$i] = (int)$array[4][$i];
	$data["SMALLTITLE"][$i] = "由 ".$array[6][$i]." 创建 • ".$array[7][$i];

	$data["CREATOR_URL"][$i] = $array[5][$i];
}
$url = 'http://www.zhihu.com'.$mainPage;
#echo $url;die;
$ret = myCurl($url, 'get', $header, $cookie, null, false);
preg_match_all("/src=\"(.*)\"\sclass=\"zm-profile-header-img/", $ret, $icon);
$data["ICON"] = $icon[1][0];
$data = json_encode($data);
print_r($data);
?>