url='https://book.douban.com/'
headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
data=requests.get(url,headers=headers)
# print(data.text)
#解释
print('Phrase by BeautifulSoup')
soup=BeautifulSoup(data.text,'lxml')
print(soup)
books_left=soup.find('ul',{'class':'list-col list-col5 list-express slide-item'})
boos_left=books_left.find_all('li')
books=list(books_left)
img_urls=[]
titles=[]
ratings=[]
authors=[]
details=[]
for book in books:
if type(book) is not bs4.NavigableString:
img_url=book.find_all('a')[0].find('img').get('src')
title=book.find_all('a')[0].get('title')
print("image_url:"+img_url)
print("title:" + title)
一、分析过程
1.代码如上所示:
2.出错位置:img_url=book.find_all('a')[0].find('img').get('src')
3.调试监听分析:books 中有 “\n”空行
二、解决方法:
加判断过滤空行:if type(book) is not bs4.NavigableString: