python爬取豆瓣电影250_python爬取豆瓣电影top250数据存入数据库

#-*- coding:utf-8 -*-

"""获取时光影评电影"""

importrequestsfrom bs4 importBeautifulSoupfrom datetime importdatetime,timedeltaimportpymysql#用来操作数据库的类

classMySqlCommand(object):#类的初始化

def __init__(self):

self.host= "127.0.0.1"self.port= 3306 #端口号

self.user = "root" #用户名

self.password = "" #密码

self.db = "" #库

self.table = "" #表

#连接数据库

defconnectMysql(self):try:

self.conn= pymysql.connect(host=self.host,port=self.port,user=self.user,

passwd=self.password,db=self.db,charset='utf8')

self.cursor=self.conn.cursor()returnself.cursor,self.connexcept:print('connect mysql error.')#获取指定开始排行的电影url

defget_url(root_url,start):return root_url+"?start="+str(start)+"&"

defget_review(page_url):"""获取电影相关的信息"""cursor,db=MySqlCommand().connectMysql()#creat_table = """CREATTE TABLE douban(id INT (11) NOT NULL AUTO_INCREMENT PRIMARY KEY,rank VARCHAR(128),title VARCHAR(128),score VARCHAR(128),descs VARCHAR(128))"""

creat_table =("CREATE TABLE douban("

"rank varchar(255),"

"title varchar(255),"

"score varchar(255),"

"descs varchar(255))")

cursor.execute("DROP TABLE IF EXISTS douban")

cursor.execute(creat_table)

movies_list=[]

reponse=requests.get(page_url)

soup=BeautifulSoup(reponse.text,'lxml')

soup= soup.find("ol","grid_view")

dict={}for tag_li in soup.find_all("li"):

dict={}

dict['rank'] = tag_li.find("em").string

dict['title'] = tag_li.find_all("span","title")[0].string

dict['score'] = tag_li.find("span","rating_num").stringif tag_li.find("span","inq"):

dict['desc'] =tag_li.find("span","inq").stringelse:

dict['desc'] = '无评词'cursor.execute("INSERT INTO douban(rank,title,score,descs)\

VALUES(%s,%s,%s,%s)",\

(dict['rank'],dict['title'],dict['score'],dict['desc']))

db.commit()

db.close()#movies_list.append(dict)

#return movies_list

if __name__ == '__main__':

root_url= "https://movie.douban.com/top250"start=0

movies_list=get_review(get_url(root_url,start))#for movies in movies_list:

#print(movies)

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值