python 带分页

#coding=utf-8
import re
import requests
import os
import MySQLdb

def findimg(url):
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'}
    f = requests.get(url)
    html = f.text


    pagecurrt=re.search('r_0">.*?<ul><li>.*?(\d+).*?</li>',html,re.S).group(1)  #获取总页数

    out1 = os.path.dirname(url)
    outurl = os.path.basename(url)
    filename = outurl.split("/")[-1].split(".")[0]
    pagecurrt=int(pagecurrt)
    findurlimg(url)
    for i in range(2,pagecurrt+1):

       nurl=re.sub(filename+'.html',filename+'_%d.html'%i,url,re.S)

       findurlimg(nurl)



def findurlimg(url):


    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'}
    f = requests.get(url,headers=header)
    html = f.text
    contentpic = re.findall("src='(.*?)'.*?", html)

    downloadimg(contentpic[0])

    #downloadimg(contentpic)

def downloadimg(url):
    print "download:" + url
    parram = 'http://.*?/'
    out = re.sub(parram,'', url)
    out1 = os.path.dirname(out)
    outurl = os.path.exists(out1)

    if not outurl:
        os.makedirs(out1)

    pic = requests.get(url)
    fp = open(out, 'wb')
    fp.write(pic.content)
    fp.close()

url="http://www.u9980.com/yazhourenti/2013/0218/2679.html"
findimg(url)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值