Python实现爬取京东网页的图片

import random
from urllib import request
import re
import os
import time
import threadpool


def getAmason(i,j):
    agentsList = [
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
        "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2"
    ]
    num = 50
    iNum = i
    while iNum<j:
        k = (iNum*2-1)
        num = (num+iNum+48)
        urlPath="http://search.jd.com/Search?keyword=%E8%BF%9E%E8%A1%A3%E8%A3%99&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E8%BF%9E%E8%A1%A3%E8%A3%99&page="+str(k)+"&s="+str(num)+"&click=0"
        print(urlPath)
        requestPath = request.Request(urlPath)
        userAgent = random.choice(agentsList)
        requestPath.add_header("User-Agent",userAgent)
        responseYama = request.urlopen(requestPath)
        res = responseYama.read().decode("utf-8")
        imag1 = re.compile('<img width="220" height="282".*(img\d{1,}\.\w+\.com(/\w+)+\.jpg)"')
        imgList1 = imag1.findall(res)
        print(len(imgList1))
        for imgFile in imgList1:
            index = imgFile[0].rfind("/")
            imgfinename = imgFile[0][index+1:]
            imgpath = os.path.join("H:\Python Project\day19\img",imgfinename)
            imgUrl = "http://"+imgFile[0]
            request.urlretrieve(imgUrl,imgpath)
        time.sleep(5)
        iNum += 1

if __name__ == '__main__':

    value1 = (1,26)
    value2 = (26,51)
    value3 = (51,76)
    value4 = (76,100)

    arglist = [(value1,None),(value2,None),(value3,None),(value4,None)]

    thPool = threadpool.ThreadPool(4)

    threadRequests = threadpool.makeRequests(getAmason,arglist)

    for threquest in threadRequests:
        thPool.putRequest(threquest)

    thPool.wait()

    print("结束")
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值