编辑环境:python3.6,pychrm
用到的第三方库:requests,lxml
代码:
import requests
from lxml import etree
for i in range ( 0 , 251 , 25 ) :
num = i
headers = {
'User-Agent' :
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
}
url = F'https://movie.douban.com/top250?start={num}&filter='
rps = requests. get( url, headers= headers)
response = rps. text
dom = etree. HTML( response)
items = dom. xpath( '//div[@class="item"]' )
for item in items:
picName = item. xpath( 'div[@class="info"]/div[@class="hd"]/a/span[@class="title"]/text()' ) [ 0 ]
imgsurl = item. xpath( 'div[@class="pic"]/a/img/@src' ) [ 0 ]
img = requests. get( imgsurl) . content
with open ( "d:/data/" + picName+ ".jpg" , 'wb' ) as fileWrite:
fileWrite. write( img)
print ( picName)