php爬数据库,我的第一个PHP练习——php爬取信息存储到mysql

1.获取分类的名称和url(text:名字,href:链接)

fce20eafa9754b319d03db4ce549fef3.png

$url="http://books.toscrape.com/";

$data=QueryList::get($url)->rules([

'categoryname' => ['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a','text'],

'url'=>['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a','href']

])->query()->getData();

复制代码

2.通过图书url获取图书信息

(1)获取单页数据,因url不完整,则需要拼接

6c49360f354c2a25696cfd0c25f34eae.png

//每页数据

$data=QueryList::get($url)->rules([

'bookname'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > h3 > a','title'],

'bookprice'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > div.product_price > p.price_color','text']

])->queryData();

复制代码

(2)获取下一页链接,判断是否有下一页,若有,拼接链接

1c9db44e71a360084d87e98316967bd1.png

d5e8915cadefbaf1d4895b3778fb7ec9.png

strrpos("str","/")查询字符串str中,最后一个“/”所在位置。

substr("str",num)从第num个字符开始截取字符,到最后一位。

$nexturl=QueryList::get($url)->find('#default > div > div > div > div > section > div:nth-child(2) > div > ul > li.next > a')->href;

//判断是否有下一页

if($nexturl!=NULL&&$nexturl!=''){ //链接修改

$urll=$nexturl;

$str=substr($url,strrpos($url,"/")+1);

$url=str_replace($str,$urll,$url);

$num==1;

}

复制代码

(3)通过do-while循环获取所有的图书信息

$alldata=array();

do{

$num=0;

//每页数据

$data=QueryList::get($url)->rules([

'bookname'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > h3 > a','title'],

'bookprice'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > div.product_price > p.price_color','text']

])->queryData();

$alldata=array_merge($alldata,$data);

//print_r($data);

$nexturl=QueryList::get($url)->find('#default > div > div > div > div > section > div:nth-child(2) > div > ul > li.next > a')->href;

//判断是否有下一页

if($nexturl!=NULL&&$nexturl!=''){ //链接修改

//print($nexturl);

$urll=$nexturl;

$str=substr($url,strrpos($url,"/")+1);

$url=str_replace($str,$urll,$url);

$num=1;

//print_r($url."\n");

}

}while($num==1);

复制代码

3.将爬取的数据存入mysql数据库

(1)创建数据库和表格

bcc0bdc5793a79837465011bd28499fe.png

create database bookstore character set utf8;

use bookstore;

CREATE TABLE `category` (

`categoryid` int(11) NOT NULL AUTO_INCREMENT,

`categoryname` varchar(255) NOT NULL,

PRIMARY KEY (`cid`)

) ENGINE=MyISAM DEFAULT CHARSET=utf8;

CREATE TABLE `books` (

`bookid` int(11) NOT NULL AUTO_INCREMENT,

`bookname` varchar(255) NOT NULL,

`bookprice` varchar(10) NOT NULL,

`categoryid` int(11) NOT NULL,

PRIMARY KEY (`bid`)

) ENGINE=MyISAM DEFAULT CHARSET=utf8;

复制代码

(2)PHP连接数据库

// 创建mysql连接

$servername = "localhost"; // 本地主机

$username = "root"; // 用户名

$password = "root"; // 用户密码

$dbname = "bookstore"; // 数据库名字

$conn = mysqli_connect($servername, $username, $password, $dbname);

mysqli_query($conn,"SET NAMES utf8");

复制代码

(3)将数据写入数据库

单条数据插入 mysqli_query,写入图书类,获取图书类ID

$sqltype="insert into category(categoryname) values (\"{$dtype['categoryname']}\")";

mysqli_query($conn,$sqltype);

//分类ID

$typeid=mysqli_insert_id($conn);

复制代码

多条数据插入 mysqli_multi_query,写入图书信息

foreach ($alldata as $key=>$value){

$sql.="insert into books(bookname,bookprice,categoryid) values (\"{$value['bookname']}\",\"{$value['bookprice']}\",$typeid);";

}

$result=mysqli_multi_query($conn,$sql);

复制代码

判断是否成功

if($result){echo"写入成功";}

else{echo"写入失败";}

复制代码

完成

bb6084d225cc95bec26b387d449c78b1.png

770eedfe8eacb6909f796c8957122530.png

4.完整代码

require 'vendor/autoload.php';

use QL\QueryList;

//$url="http://books.toscrape.com/";

function get_url_name($url){//获取分类链接和名字

$data=QueryList::get($url)->rules([

'categoryname' => ['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a','text'],

'url'=>['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a','href']

])->query()->getData();

//print(count($data));

//print_r($data->all());

return $data;

}

function get_bookname_price($url){

$alldata=array();

do{

$num=0;

//每页数据

$data=QueryList::get($url)->rules([

'bookname'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > h3 > a','title'],

'bookprice'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > div.product_price > p.price_color','text']

])->queryData();

$alldata=array_merge($alldata,$data);

//print_r($data);

//获取下一页链接

$nexturl=QueryList::get($url)->find('#default > div > div > div > div > section > div:nth-child(2) > div > ul > li.next > a')->href;

//判断是否有下一页

if($nexturl!=NULL&&$nexturl!=''){ //链接修改

//print($nexturl."\n");

$urll=$nexturl;

$str=substr($url,strrpos($url,"/")+1);

$url=str_replace($str,$urll,$url);

$num=1;

//print_r($url."\n");

}

}while($num==1);

print (count($alldata)."\n");

/* $i=1;

foreach ($alldata as $d){

print_r($i++."\t".$d['bookname']."\t\t".$d['bookprice']."\n");

}*/

return $alldata;

}

//get_bookname_price('http://books.toscrape.com/catalogue/category/books/mystery_3/index.html');

function start(){

$url="http://books.toscrape.com/";

$data=get_url_name($url);

// 创建mysql连接

$servername = "localhost"; // 本地主机

$username = "root"; // 用户名

$password = "root"; // 用户密码

$dbname = "bookstore"; // 数据库名字

$conn = mysqli_connect($servername, $username, $password, $dbname);

mysqli_query($conn,"SET NAMES utf8");

$sql='';

$sqltype='';

//$bookname='';

//$bookprice='';

foreach ($data as $key=>$dtype){

print_r($dtype['categoryname']."\n");

// $categoryname=$dtype['categoryname'];

$sqltype="insert into category(categoryname) values (\"{$dtype['categoryname']}\")";

mysqli_query($conn,$sqltype);

//分类ID

$typeid=mysqli_insert_id($conn);

$alldata=get_bookname_price( "http://books.toscrape.com/".$dtype['url']);

$url="http://books.toscrape.com/".$url;

foreach ($alldata as $key=>$value){

//print_r($data);

// print_r("\t\"".$value['bookname']."\"\t\t".$value['bookprice']."\n");

//$bookname=$value['bookname'];

//$bookprice=$value['bookprice'];

$sql.="insert into books(bookname,bookprice,categoryid) values (\"{$value['bookname']}\",\"{$value['bookprice']}\",$typeid);";

}

}

echo($sql);

$result=mysqli_multi_query($conn,$sql);

if($result){

echo"写入成功";

}else{

echo"写入失败";

}

}

start();

复制代码

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值