前提:学了python3之后,总想找个项目锻炼一下,跟着网上的学习爬数据,下面是我根据网上的案例进行改变的,从豆瓣爬电影数据
import requests
from bs4 import BeautifulSoup
import psycopg2
class DouBan():
def __init__(self):
self.database = 'postgres',
self.user = 'postgres'
self.password = 'jinshuai',
self.host = '127.0.0.1',
self.port = '5432'
def connect_database(self):
self.__db = psycopg2.connect(database='postgres',user='postgres',password='jinshuai',host="localhost",port="5432")
def web_content(self):
self.headers = {
'Cookie': 'bid=5DWq6ockZ70; douban-fav-remind=1; __utmz=30149280.1597546049.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=30149280; __utmc=223695111; __utmz=223695111.1606473256.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _pk_ses.100001.4cf6=*; ap_v=0,6.0; __utma=30149280.1853893553.1597546049.1606473256.1606554400.3; __utmb=30149280.0.10.1606554400; __utma=223695111.1967468661.1606473256.1606473256.1606554400.2; __utmb=223695111.0.10.1606554400; _pk_id.100001.4cf6=dcfcadb0ebf6c236.1606473256.2.1606554403.1606473256.',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
}
self.url = 'https://movie.douban.com/top250?start=25&filter='
r = requests.get(self.url,headers = self.headers)
soup = BeautifulSoup(r.text,'html.parser')
return soup.find_all('div',class_='item')
def insert_data(self):
self.connect_database()
cursor = self.__db.cursor()
content_list = self.web_content()
i = 0
for content in content_list:
title = content.find('span', class_='title').text
rating_num = content.find('span', class_='rating_num').text
quote = content.find('span', class_='inq').text
star = content.find('div', class_='star')
comment = star.find_all('span')[-1].text[:-3]
sql = 'insert into dou_ban_top(title,rate_num,quote,comment_num) values (%s,%s,%s,%s)'
try:
cursor.execute(sql,(title,rating_num,quote,comment))
print("执行条数:",cursor.rowcount)
i +=1
except Exception as e:
print(e)
self.__db.commit()
cursor.close()
self.__db.close()
print("总条数为:",i)
if __name__== '__main__':
douban = DouBan()
douban.insert_data()
只要分为以下几个大部分:
(1)连接数据库
(2)请求豆瓣网站获取数据
(3)把数据保存到数据库
![在这里插入图片描述](https://img-blog.csdnimg.cn/20201130220835816.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM3OTYyNDAy,size_16,color_FFFFFF,t_70)
我都是从网上根据案例,一点一点学习,学习语法,学习用法,我认为只有动手才能理解的更快,后面有啥好的想法,会继续更新。