hello 大家好,最近写代码,需要用到爬虫去爬取数据,废话不多说,进入主题
既然要存入数据库,那必然需要进行数据库连接,通过Python里面的pymysql驱动去链接我们的mysql数据库,然后建立数据库游标,用户可以用SQL语句逐一从游标中获取记录,并赋给主变量,交由主语言进一步处理。
import pymysql
conn = pymysql.connect(host='127.0.0.1', user='root', password='数据库密码', port=3306, database='book')
cursor = conn.cursor()
创建请求头通过requests模块请求响应网站
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
}
url = 'https://www.qidian.com/all?orderId=11&style=1&pageSize=20&siteid=1&pubflag=0&hiddenField=0&page=4'
reponse = requests.get(url, headers=headers) # reponse
html = etree.HTML(reponse.text)
解析你想要爬的数据,这里用的XPath插件(浏览器插件上有)通过html标签进行筛选内容
name = html.xpath('//div[@class="book-mid-info"]/h4/a/text()')
author = html.xpath('//div[@class="book-mid-info"]/p/a[1]/text()')
type = html.xpath('//div[@class="book-mid-info"]/p/a[2]/text()')
info = html.xpath('//div[@class="book-mid-info"]/p[2]/text()')
最后一步就要进行插入语句了,不要忘记数据库连接和游标的释放
for n in range(len(author)):
sql = "insert into book(bname,bautor,binfo,btype,bimage,bticket,bword,bnew) values('%s','%s','%s','%s','%s','%d','%d','%d')"%(name[n],author[n],info[n].strip(),type[n],"image/"+name[n]+".jpg",random.randint(0,1000),random.randint(100,300),0)
print(name[n])
print(author[n])
print(info[n])
print(type[n])
cursor.execute(sql)
conn.commit()
cursor.close()
# 关闭连接
conn.close()
最后供上完整代码
import random
import requests
import pymysql
from lxml import etree
conn = pymysql.connect(host='127.0.0.1', user='root', password='数据库密码', port=3306, database='book')
cursor = conn.cursor()
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
}
url = 'https://www.qidian.com/all?orderId=11&style=1&pageSize=20&siteid=1&pubflag=0&hiddenField=0&page=4'
reponse = requests.get(url, headers=headers) # reponse
html = etree.HTML(reponse.text)
name = html.xpath('//div[@class="book-mid-info"]/h4/a/text()')
author = html.xpath('//div[@class="book-mid-info"]/p/a[1]/text()')
type = html.xpath('//div[@class="book-mid-info"]/p/a[2]/text()')
info = html.xpath('//div[@class="book-mid-info"]/p[2]/text()')
for n in range(len(author)):
sql = "insert into book(bname,bautor,binfo,btype,bimage,bticket,bword,bnew) values('%s','%s','%s','%s','%s','%d','%d','%d')"%(name[n],author[n],info[n].strip(),type[n],"image/"+name[n]+".jpg",random.randint(0,1000),random.randint(100,300),0)
print(name[n])
print(author[n])
print(info[n])
print(type[n])
cursor.execute(sql)
conn.commit()
cursor.close()
# 关闭连接
conn.close()