一直想学习爬虫
直到最近两天 才开始了学习…
以下尝试了requests和BeautifulSoup的基本用法
抓取了豆瓣新书速递的图片 并以书名对图片进行命名
请各位看官多多指教(如果有人看的话…)
import requests
from bs4 import BeautifulSoup as bs
url='https://book.douban.com/latest?icn=index-latestbook-all'
response=requests.get(url) #获取html文件
soup=bs(response.content,'lxml') #建立BeautifulSoup对象
href=soup.body.find_all('img') #html解析得到图片链接地址
name=soup.body.find_all('div',{'class':'detail-frame'})#获取书名
#建立列表保存书名和图片链接
book_title=[]
book_url=[]
for i in name:
book_title.append(i.h2.get_text().strip())
for i in href:
book_url.append(str(i['src']))
#存储图片
for i in range(len(book_url)):
real_url=book_url[i]
pic=requests.get(real_url)
with open(str(book_title[i])+'.jpg','wb') as f:
for chunk in pic.iter_content(chunk_size=2048):
f.write(chunk)
f.close