(先占坑,之后补充)
1.爬取豆瓣 复仇者联盟3标题
from bs4 import BeautifulSoup
import requests
myurl = requests.get('https://movie.douban.com/subject/24773958/')
v_text = BeautifulSoup(myurl.text , 'lxml')
v_title = v_text.find('span' , attrs = {'property' : 'v:itemreviewed'}).text
print(v_title)
2.爬取豆瓣 复联3简介
from bs4 import BeautifulSoup
import requests
myurl = requests.get('https://movie.douban.com/subject/24773958/')
v_text = BeautifulSoup(myurl.text , 'lxml')
v_shortIntroduce = v_text.find('span' , attrs = {'property' : 'v:summary'}).text
print(v_shortIntroduce)
3.爬取豆瓣 复联3主要信息
from bs4 import BeautifulSoup
import requests
myurl = requests.get('https://movie.douban.com/subject/24773958/')
v_text = BeautifulSoup(myurl.text , 'lxml')
v_mainMeassage = v_text.find('div' , attrs = {'id' : 'info'}).text
print(v_mainMeassage)
4.爬取豆瓣 电影标题
from bs4 import BeautifulSoup
import requests
myurl = requests.get('https://movie.douban.com/')
v_text = BeautifulSoup(myurl.text , 'lxml')
v_mainMeassage = v_text.findAll('li' , attrs = {'class' : 'title'})
for i in v_mainMeassage:
print(i.text)