import urllib import re error_count = 0 down_path = r'C:\liujwFiles\NON_IBM_Files\PycharmProjects\pa_chong_files' for page in range(1, 11): # page 1--10 #The pages after the first page(2--10): page = int(page) * 10 if page == 1: url_suffix = '' else: url_suffix = '?start=%i' % page print "Downloading current page: ", page res = urllib.urlopen(r'https://www.douban.com/location/wuhan/events/week-all%s' % url_suffix).read() reg = r'data-lazy="(.*?)"' # key words2 url_list = re.findall(reg, res) download_count = len(url_list) print "Begin to download files.........there are %i files in this page....." % download_count for url in url_list: try: image_name = url.split('/')[-1].split('?')[0] urllib.urlretrieve(url, '%s\download_img.%s' % (down_path, image_name)) except IOError: print "File %s download failed......." % url.split('/')[-1] error_count = error_count + 1 print "Download complete! %i pages, %i files in total, %i files download failed!" % (page, download_count * page, error_count)
Python 爬虫抓取图片(分页)
最新推荐文章于 2023-07-07 12:24:11 发布