目录
创建test数据库
创建test的数据库
'''
创建数据库
'''
self.mydb = pymysql.connect(
host="localhost",
user="你的用户名",
password="你的密码"
)
self.mycursor = self.mydb.cursor() #游标
self.mycursor.execute("CREATE DATABASE if not exists test") #创建名为test的数据库
创建table1表
self.db = pymysql.connect(
host='localhost',
user='你的用户名',
password='你的密码',
db='test',
port=3306)
self.cursor=self.db.cursor()
#如果存在table1表,则删除
self.cursor.execute("DROP TABLE IF EXISTS table1")
#创建table1表
crea_table = '''
create table table1(
class_url varchar(150),
pic_url varchar(150),
class_name varchar(150),
people_nums int)'''
try:
# 执行SQL语句
self.cursor.execute(crea_table)
print("创建数据表成功")
except Exception as e:
print("创建数据表失败:case%s"%e)
finally:
#关闭游标连接
self.cursor.close()
# 关闭数据库连接
self.db.close()
将爬虫数据存入table1表中
'''
数据爬取
'''
db1 = pymysql.connect(
host='localhost',
user='你的用户名',
password='你的密码',
db='test',
port=3306)
cursor1=db1.cursor()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
html = requests.get(url,headers=headers).content.decode('utf-8')
respose = etree.HTML(html)
lists = respose.xpath('//div[@class="list max-1152 clearfix"]/a[@class="item free "]')
#插入数据
for lis in lists:
#课程链接
c_url = 'http:'+lis.xpath('./@href')[0]
#图片链接
p_url = 'http:'+ lis.xpath('./div[@class="img"]/@style')[0].split("url('")[-1].split("')")[0]
#课程名字
name = lis.xpath('./p[@class="title ellipsis2"]/text()')[0]
#学习人数
print(name)
num = int(lis.xpath('./p[@class="one"]/text()')[0].split('· ')[-1].split('人')[0])
insert1 = "insert into table1(class_url,pic_url,class_name,people_nums)\
VALUES('%s','%s','%s', '%d')"%(c_url,p_url,name,num)
try:
# 执行sql语句
cursor1.execute(insert1)
# 执行sql语句
db1.commit()
except:
# 发生错误时回滚
db1.rollback()
#关闭游标和数据库
cursor1.close()
db1.close()
总的代码
import requests
import time
from lxml import etree
import pymysql
'''
这是一个爬取数据保存至数据库的demo
'''
class Spider_database:
def __init__(self):
'''
创建数据库
'''
self.mydb = pymysql.connect(
host="localhost",
user="你的用户名",
password="你的密码"
)
self.mycursor = self.mydb.cursor() #游标
self.mycursor.execute("CREATE DATABASE if not exists test") #创建名为test的数据库
'''
创建表
'''
self.db = pymysql.connect(
host='localhost',
user='你的用户名',
password='你的密码',
db='test',
port=3306)
self.cursor=self.db.cursor()
#如果存在table1表,则删除
self.cursor.execute("DROP TABLE IF EXISTS table1")
#创建table1表
crea_table = '''
create table table1(
class_url varchar(150),
pic_url varchar(150),
class_name varchar(150),
people_nums int)'''
try:
# 执行SQL语句
self.cursor.execute(crea_table)
print("创建数据表成功")
except Exception as e:
print("创建数据表失败:case%s"%e)
finally:
#关闭游标连接
self.cursor.close()
# 关闭数据库连接
self.db.close()
def spyder(self,url):
'''
数据爬取
'''
db1 = pymysql.connect(
host='localhost',
user='你的用户名',
password='你的密码',
db='test',
port=3306)
cursor1=db1.cursor()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
html = requests.get(url,headers=headers).content.decode('utf-8')
respose = etree.HTML(html)
lists = respose.xpath('//div[@class="list max-1152 clearfix"]/a[@class="item free "]')
#插入数据
for lis in lists:
#课程链接
c_url = 'http:'+lis.xpath('./@href')[0]
#图片链接
p_url = 'http:'+ lis.xpath('./div[@class="img"]/@style')[0].split("url('")[-1].split("')")[0]
#课程名字
name = lis.xpath('./p[@class="title ellipsis2"]/text()')[0]
#学习人数
print(name)
num = int(lis.xpath('./p[@class="one"]/text()')[0].split('· ')[-1].split('人')[0])
insert1 = "insert into table1(class_url,pic_url,class_name,people_nums)\
VALUES('%s','%s','%s', '%d')"%(c_url,p_url,name,num)
try:
# 执行sql语句
cursor1.execute(insert1)
# 执行sql语句
db1.commit()
except:
# 发生错误时回滚
db1.rollback()
#关闭游标和数据库
cursor1.close()
db1.close()
if __name__ == '__main__':
s = Spider_database()
for i in range(1,3):
url = 'http://www.imooc.com/course/list?page={}'.format(i)
s.spyder(url)
查看数据库中的数据
1.通过MySQL Server查看,不会的可以查看我之前发的https://download.csdn.net/download/qq_33267306/48191430?spm=1001.2014.3001.5503
2.通过代码查看
import pymysql
def query_data():
db = pymysql.connect(
host='localhost',
user='你的用户名',
password='你的密码',
db='test',
port=3306)
cursor = db.cursor()
# 查询
query = "select * from table1"
try:
# 执行SQL语句
cursor.execute(query)
# 获取所有记录列表
results = cursor.fetchall()
for row in results:
class_url = row[0]
pic_url = row[1]
name = row[2]
num = row[3]
# 打印结果
print(class_url,pic_url,name,num)
except Exception as e:
print(e)
finally:
db.close()
# 关闭数据库连接
return results
if __name__ == '__main__':
aa = query_data()
总结
在存入MySQL数据库的时候,注意存入数据的长度,如果创建表中字符的长度小于数据的长度,就存入不了,切记!