引言
因为最近要做一个扫描图书的ISBN编码获取图书信息的功能,要用到一个开放的API,首先想到的就是豆瓣。但是豆瓣由于什么原因关闭了开放的API。
一.实现效果
图片描述
二.代码实现
1.首先分析豆瓣图书首页查询ISBN编码的方法。地址https://book.douban.com/。
2.分析豆瓣的界面结构以及自己需要抓取的信息。
3.利用php进行重定向和curl,取出关键的内容。
(注意:本套代码采用的TP5.1编写)详细代码:
public function getBookInfo(){
$isbn = @$_GET['isbn'];
$surl = 'https://book.douban.com/isbn/' . $isbn . '/';
$headers = json_encode(get_headers($surl), true);
$headers = json_encode($headers, true);
$surl = cut($headers, 'Location: ', '"');
$surl = str_replace('\\', '', $surl);//302地址
$data = getIsbn($surl);
$data_1 = cut($data, 'application/ld+json">', '</script>');
$data_1 = json_decode($data_1, true);
$res['title'] = $data_1['name'];//书名
$res['logo'] = cut($data, 'data-pic="', '"');//图标
$author = $data_1['author'];
if ($author[0] == '') {
$author[0]['name'] = '未知';
}
$res['author'] = $author;//作者
//相关书籍推荐
$publisher = cut($data, '出版社:</span>', '<br/>');
if ($publisher == '') {
$publisher = '未知';
}
$author_desc = cut($data, 'class="indent ">', '</div>');
$res['author_desc'] = cut($author_desc, '<p>', '</p>');//作者简介
$res['publisher'] = $publisher;//出版社
$published = cut($data, '出版年:</span>', '<br/>');
if ($published == '') {
$published = '未知';
}
$res['published'] = $published;//出版年
$page = cut($data, '页数:</span>', '<br/>');
if ($page == '') {
$page = '未知';
}
$res['page'] = $page;//页数
$price = cut($data, '定价:</span>', '<br/>');
if ($price == '') {
$price = '未知';
}
$res['price'] = $price;//定价
$designed = cut($data, '装帧:</span>', '<br/>');
if ($designed == '') {
$designed = '未知';
}
$res['designed'] = $designed;//装帧
$description = cut($data, 'class="intro">', '</p>');
$description = explode('<p>', $description)[1];
if ($description == '') {
$description = '未知';
}
$res['description'] = $description;//简介
return show(1,'请求成功',$res);
}
common.php中实现cut()方法、getIsbn()和show()方法。
function cut($content, $start, $end)
{
$r = explode($start, $content);
if (isset($r[1])) {
$r = explode($end, $r[1]);
return $r[0];
}
return '';
}
function getIsbn($url) //curl get请求
{
$postUrl = $url;
$curlPost = 'GET';
$curl = curl_init();//初始化curl
curl_setopt($curl, CURLOPT_URL,$postUrl);//抓取指定网页
curl_setopt($curl, CURLOPT_HEADER, 0);//设置header
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);//要求结果为字符串且输出到屏幕上
curl_setopt($curl, CURLOPT_POST, 1);//post提交方式
curl_setopt($curl, CURLOPT_POSTFIELDS, $curlPost);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //不验证证书下同
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
$data = curl_exec($curl);//运行curl
curl_close($curl);
return $data;
}
function show($status, $message, $data = [])
{
$data = [
'status' => $status,
'message' => $message,
'data' => $data
];
return json_encode($data, JSON_UNESCAPED_UNICODE);
}