【图片爬虫】60行代码带你看婚纱摄影——金夫人

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from urllib import request
import re
from fake_useragent import UserAgent

url = 'http://www.bjjfr.com/kezhao/'
listpage = 6
imgDIR = r'./kepian//'
ypimgDIR = r'./yangpian//'
yplistpage = 11
ua = UserAgent()
def getResponse(url):
    header = {}
    header['User-Agent'] = ua.random
    response = request.urlopen(url,)
    result = response.read().decode('utf-8')
    return result

def getDetail(response,dir):
    reg = '<img alt="" src="/uploads/allimg/(.*?).jpg" />'
    namereg = '<title>(.*?)_北京金夫人</title>'
    imglist = re.findall(reg,response)
    for imgURL in imglist:
        url = 'http://www.bjjfr.com/uploads/allimg/%s.jpg'%str(imgURL)
        name = str(re.findall(namereg,response)[0]).replace(' ','').replace('/','-')+'_'+imgURL.replace('/','-')
        request.urlretrieve(url,dir + name + '.jpg')

if __name__ == '__main__':
    # 客片
    for i in range(1,listpage+1):
        url = 'http://www.bjjfr.com/kezhao/'
        url = url + 'list'+str(i)+ '.html'
        result = getResponse(url)
        reg = '<a href="/kezhao/(.*?).html" target="_blank"><img src="/uploads/allimg/.*?" alt=".*?客片"></a>'
        kezhao = re.findall(reg,str(result))
        kezhao.append('1890')
        kezhao.append('1891')
        kezhao.append('1892')
        for kezhaoimg in kezhao:
            detailURL = 'http://www.bjjfr.com/kezhao/%s.html'%str(kezhaoimg)
            detail = getResponse(detailURL)
            res = getDetail(detail,imgDIR)
            print(detailURL)

        print(url, 'OK')

    # 样片
    for a in range(1,yplistpage+1):
        ypurl = 'http://www.bjjfr.com/zp/list%d.html'%a
        ypresult = getResponse(ypurl)
        ypreg = '<a href="/zuopin/(.*?).html" target="_blank"><img src=".*?" alt=""></a>'
        yplist = re.findall(ypreg,str(ypresult))
        for n in yplist:
            ypdetailurl = 'http://www.bjjfr.com/zuopin/%s.html'%str(n)
            ypdetail = getResponse(ypdetailurl)
            res = getDetail(ypdetail, ypimgDIR)
            print(ypdetailurl)
        print(ypurl, 'OK')
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值