代码如下
from bs4 import BeautifulSoup
import requests
import time
import os
import urllib
import random
single_url = "http://weheartit.com/inspirations/taylorswift?scrolling=true&page="
target_dir = "/Users/reed/Desktop/images"
def make_file_name(path):
if not os.path.isdir(target_dir):
os.mkdir(target_dir)
dot_ps = path.rindex('.')
image_path = os.path.join(target_dir, str(random.random())[-9:] + path[dot_ps:])
print(image_path)
return image_path
def get_page(url, data=None):
print(url)
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, 'lxml')
image_divs = soup.find_all("div", class_="entry-preview ")
if data is None:
print('len of images: ' + str(len(image_divs)))
for image_div in image_divs:
images = image_div.find_all("img", class_="entry-thumbnail")
image_url = images[0].get('src')
print(image_url)
urllib.request.urlretrieve(image_url, make_file_name(image_url))
print('.')
def walk_pages(start, end):
for num in range(start, end):
print('..')
get_page(single_url + str(num))
time.sleep(2)
walk_pages(3, 4)
log如下
/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5 /Users/reed/PycharmProjects/web01/web_parse4.py
..
http://weheartit.com/inspirations/taylorswift?scrolling=true&page=3
len of images: 24
http://data.whicdn.com/images/197230154/superthumb.jpg
/Users/reed/Desktop/images/643362636.jpg
.
http://data.whicdn.com/images/149640530/superthumb.jpg
/Users/reed/Desktop/images/690903127.jpg
.
http://data.whicdn.com/images/158339547/superthumb.jpg
/Users/reed/Desktop/images/279342765.jpg
.
http://data.whicdn.com/images/165672936/superthumb.png
/Users/reed/Desktop/images/178600404.png
.
http://data.whicdn.com/images/197581707/superthumb.jpg
/Users/reed/Desktop/images/001178043.jpg
.
http://data.whicdn.com/images/248682240/superthumb.png
/Users/reed/Desktop/images/950703002.png
.
http://data.whicdn.com/images/198418513/superthumb.jpg
/Users/reed/Desktop/images/644595508.jpg
.
http://data.whicdn.com/images/179303382/superthumb.jpg
/Users/reed/Desktop/images/248896182.jpg
.
http://data.whicdn.com/images/179303481/superthumb.jpg
/Users/reed/Desktop/images/693730015.jpg
.
http://data.whicdn.com/images/179311943/superthumb.jpg
/Users/reed/Desktop/images/269555826.jpg
.
http://data.whicdn.com/images/266178739/superthumb.jpg
/Users/reed/Desktop/images/847567642.jpg
.
http://data.whicdn.com/images/266238176/superthumb.jpg
/Users/reed/Desktop/images/763393865.jpg
.
http://data.whicdn.com/images/266254626/superthumb.jpg
/Users/reed/Desktop/images/915840388.jpg
.
http://data.whicdn.com/images/266315562/superthumb.jpg
/Users/reed/Desktop/images/188748947.jpg
.
http://data.whicdn.com/images/266317310/superthumb.jpg
/Users/reed/Desktop/images/442216453.jpg
.
http://data.whicdn.com/images/266317447/superthumb.jpg
/Users/reed/Desktop/images/520423894.jpg
.
http://data.whicdn.com/images/266317495/superthumb.jpg
/Users/reed/Desktop/images/290174412.jpg
.
http://data.whicdn.com/images/266317536/superthumb.jpg
/Users/reed/Desktop/images/162962825.jpg
.
http://data.whicdn.com/images/266317654/superthumb.jpg
/Users/reed/Desktop/images/647146572.jpg
.
http://data.whicdn.com/images/266318571/superthumb.jpg
/Users/reed/Desktop/images/829340404.jpg
.
http://data.whicdn.com/images/266318590/superthumb.jpg
/Users/reed/Desktop/images/077170361.jpg
.
http://data.whicdn.com/images/266318702/superthumb.jpg
/Users/reed/Desktop/images/761035396.jpg
.
http://data.whicdn.com/images/266318671/superthumb.jpg
/Users/reed/Desktop/images/996100169.jpg
.
http://data.whicdn.com/images/266374822/superthumb.jpg
/Users/reed/Desktop/images/786120363.jpg
.
Process finished with exit code 0