python3爬取数据导入mysql_Python3.x使用requests库将爬取数据存储到MySQL

最新推荐文章于 2021-03-01 20:52:21 发布

王那厮

最新推荐文章于 2021-03-01 20:52:21 发布

阅读量143

点赞数

文章标签： python3爬取数据导入mysql

本文链接：https://blog.csdn.net/weixin_29369347/article/details/114927154

版权

class Douban:

def __init__(self):

#模拟请求头

self.header={

"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",

"Accept-Language": "zh-CN,zh;q=0.9",

"Cache-Control": "max-age=0",

"Connection": "keep-alive",

"Host": "movie.douban.com",

"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36"

}

def get_html(self,url): #获取页面

try:

resopnse = requests.get(url, headers=self.header)

resopnse.encoding = "utf-8"

resopnse.status_code

html = resopnse.text

return html

except Exception as e:

print("页面获取失败"+e)

return ""

def detail_url(self,html): #获取详情页面的url

html = etree.HTML(html)

durl = html.xpath('//ol[@class="grid_view"]/li//div[@class="pic"]/a/@href')

for url in durl:

self.detail_html(url)

time.sleep(2)

self.next_html(html)

def next_html(self,html): #获取下一页

n_url = html.xpath('//span[@class="next"]/a/@href')[0]

next_url = "https://movie.douban.com/top250"+n_url

if next_url:

print("="*1000,next_url)

html = self.get_html(next_url)

self.detail_url(html)

def detail_html(self,url): #获取详情页面的信息

htmls = self.get_html(url)

html = etree.HTML(htmls)

name = "".join(html.xpath('//div[@id="content"]/h1//span/text()')) # 获取名字标题

img_url = html.xpath('//div[@id="mainpic"]/a/img/@src')[0] #图片地址

daoyan = html.xpath('//div[@id="info"]/span[1]/span[2]/a/text()')[0] #导演

bianju = "".join(html.xpath('//div[@id="info"]/span[2]/span[2]//a/text()')) #编剧

zhuyan = "".join(html.xpath('//div[@id="info"]/span[3]/span[2]//text()')).replace("/","，") #主演

type = "".join(html.xpath('//span[@property="v:genre"]/text()'))#类型

score = html.xpath('//strong[contains(@class,"rating_num")]/text()')[0] # 获取评分

zu = (name,img_url,daoyan,bianju,zhuyan,type,score)

print(zu)

self.insert_table(zu)

def lian(self): # 连接数据库

username = "root"

password = "root"

path = "localhost"

dbname = "python_test"

db = pymysql.connect(path,username,password,dbname)

return db

def create_table(self): #创建表

cursor= self.lian().cursor()

cursor.execute("drop table if exists test3")

sql = """

create table test3(

id int primary key auto_increment,

name varchar(255),

img_url varchar(255),

daoyan varchar(255),

bianju varchar(255),

zhuyan text,

type varchar(255),

score varchar(255)

)character set utf8

"""

cursor.execute(sql)

def insert_table(self,zu): #添加数据到数据库

try:

cursor = self.lian().cursor()

sql = "insert into test3(name,img_url,daoyan,bianju,zhuyan,type,score) value(%s,%s,%s,%s,%s,%s,%s) "

cursor.execute(sql, zu)

self.lian().commit()

except:

self.lian().rollback()

print("添加失败")

def run(self):

self.create_table() #创建表

url = "https://movie.douban.com/top250"

html = self.get_html(url)

self.detail_url(html)

self.lian().close() #关闭数据库

# print(html)

if __name__ == '__main__':

d = Douban()

d.run()

王那厮

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python3爬取数据导入mysql_Python3.x使用requests库将爬取数据存储到MySQL

class Douban:def __init__(self):#模拟请求头self.header={"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9","Accept-Lang...
复制链接

扫一扫