import requests
import json
import pymongo
urlList = ["https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%BE%8E%E5%89%A7&sort=recommend&page_limit=20&page_start=" + str(n)
for n in range(0,500,20)]
client = pymongo.MongoClient(host='localhost',port=27017,connect=False)
#创建数据库douban
douban = client['douban']
#为数据库添加账号密码管理如果没有设置就不用填写
#douban.authenticate(name="douban_user",password="")
#为数据库添加集合方便 插入数据
douban_tv = douban['douban_tv']
douban_tv.create_index([('id',1)],unique=True)
Num = 0
for url in urlList:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"}
response_data = requests.get(url,headers=headers)
json_data = json.loads(response_data.text)
for tv in json_data['subjects']:
data = {
'rate':tv['rate'],
'title':tv['title'],
'img_url':tv['cover'],
'id':tv['id'],
#构建一个新的直接加在后面的标签
'tagd':"美剧"
}
Num += 1
douban_tv.insert(data)
print(Num,data)
Python 爬取豆瓣250电影并写入Mongodb数据库
最新推荐文章于 2021-02-05 06:29:44 发布