第一个虫子,。。。。(爬妹子图片)

import os
import urllib.request

def get_html(url):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0')
    response = urllib.request.urlopen(req)
    html = response.read()
    #print(html)
    return html


def get_img_page_list(url):
    html=get_html(url).decode('gbk')
    img_page_list=[]
    st="<a target='_blank' href="
    a=html.find(st)
    while a != -1:
        b=html.find('.html',a,a+255)
        if b != -1:
            img_page_list.append(html[a+25:b+5])
        else:
            b=a+25
        a=html.find(st,b)
    print('找到%d组网页'%len(img_page_list))
    return img_page_list


def get_img_list(img_addrs):
    #print('?')
    html=get_html(img_addrs).decode('gbk')
    A = html.find('picture')
    B = html.find(r'</div>',A)
    #print(A,B) 目标范围上下限
    img_list=[]
    a=html.find('src=',A,B)
    while a != -1:
        b = html.find('.jpg',a,B)
        if b != -1:
            img_list.append(html[a+5:b+4])
        else:
            b=a+5
        a=html.find('src=',b,B)
    #print(img_list)
    return img_list



def save_img(img_list):
    for each in img_list:
        st=each.split('/')
        name=st[-4]+'-'+st[-3]+'-'+st[-2]+'-'+st[-1]
        html=get_html(each)
        #print(name)
        with open(name,'wb') as f:
               f.write(html)
               print('save:',name)

def download_mm(dirname='XXOO',wantpages=1):

    if os.path.exists(dirname)==False:
        os.mkdir(dirname)
    os.chdir(dirname)

    url='http://www.meizitu.com/a/more_1.html'
    img_page_list = get_img_page_list(url)
    print('将下载%d页'%(wantpages))
    img_list=[]
    for i in range(wantpages):

        img_list = get_img_list(img_page_list[i])
        save_img(img_list)#下载图片



if __name__ == '__main__' :
    a=int(input('下载页数:'))
    download_mm(wantpages=a)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值