#coding=utf-8 import re import requests import os import MySQLdb def findimg(url): header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'} f = requests.get(url) html = f.text pagecurrt=re.search('r_0">.*?<ul><li>.*?(\d+).*?</li>',html,re.S).group(1) #获取总页数 out1 = os.path.dirname(url) outurl = os.path.basename(url) filename = outurl.split("/")[-1].split(".")[0] pagecurrt=int(pagecurrt) findurlimg(url) for i in range(2,pagecurrt+1): nurl=re.sub(filename+'.html',filename+'_%d.html'%i,url,re.S) findurlimg(nurl) def findurlimg(url): header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'} f = requests.get(url,headers=header) html = f.text contentpic = re.findall("src='(.*?)'.*?", html) downloadimg(contentpic[0]) #downloadimg(contentpic) def downloadimg(url): print "download:" + url parram = 'http://.*?/' out = re.sub(parram,'', url) out1 = os.path.dirname(out) outurl = os.path.exists(out1) if not outurl: os.makedirs(out1) pic = requests.get(url) fp = open(out, 'wb') fp.write(pic.content) fp.close() url="http://www.u9980.com/yazhourenti/2013/0218/2679.html" findimg(url)
python 带分页
最新推荐文章于 2024-07-28 03:11:58 发布