利用CSS选择器爬取豆瓣上的图书
主要技术:熟练掌握requests、BeautifulSoup
爬取图书链接 "https://book.douban.com/latest?icn=index-latestbook-all"
代码块
import requests
from bs4 import BeautifulSoup
def get_film(url):
headers = {
'User-Agent': 'Mozilla/5.0'} #防止反爬虫
try:
r=requests.get(url,headers=headers)
r.raise_for_status()
r.encoding=r.status_code
return r.text
except:
return "爬取失败!"
def parse_html(html,List):
film_name1=[]
film_actor1=[]
film_actor2=[]
soup=BeautifulSoup(html,'html.parser')
for name in so