python多线程爬取图片_python多线程爬取图片

最新推荐文章于 2023-12-22 12:22:59 发布

梦羽灵泉

最新推荐文章于 2023-12-22 12:22:59 发布

阅读量208

点赞数

文章标签： python多线程爬取图片

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

本文链接：https://blog.csdn.net/weixin_29127757/article/details/112827821

版权

#!/usr/bin/env python

#encoding:UTF-8

import re

import urllib

import threading

import time

import Queue

def getHtml(url):

html_page=urllib.urlopen(url).read()

return html_page

#提取网页中图片的URL

def getUrl(html):

pattern=r'http://.*?\.jpg!mid' #正则表达式 .*?(匹配http://和\.jpg!mid之间所有字符串)

imgre=re.compile(pattern)

imglist=re.findall(imgre,html) #re.findall(pattern,string) 在string中寻找所有匹配成功的字符串，以列表形式返回值

return imglist

class getImg(threading.Thread):

def __init__(self,queue): #进程间通过队列通信，所以每个进程需要用到同一个队列初始化

threading.Thread.__init__(self)

self.queue=queue

#self.setDaemon(True) #守护线程

self.start() #启动线程

#使用队列实现进程间通信

def run(self):

global count

while (True):

imgurl = self.queue.get()

# print self.getName()

#urllib.urlretrieve(url,filname) 将url的内容提取出来，并存入filename中

urllib.urlretrieve(imgurl, '/home/dragonriver/图片/girls/%s.jpg' % count)

#print "%s.jpg done"%count

count += 1

if self.queue.empty():

break

self.queue.task_done() #当使用者线程调用 task_done() 以表示检索了该项目、并完成了所有的工作时，那么未完成的任务的总数就会减少。

def main():

global count

url="http://girl-atlas.com/a/10130205170100000231" #要爬的网页地址

html=getHtml(url)

imglist=getUrl(html)

threads=[]

count=0

queue=Queue.Queue()

#将所有任务加入队列

for i in range(len(imglist)):

queue.put(imglist[i])

#多线程爬去图片

for i in range(4):

thread=getImg(queue)

threads.append(thread)

#合并进程，当子进程结束时，主进程才可以执行

#for thread in threads:

#thread.join()

#另一种保持主进程阻塞的方法，次方法和前面的self.queue.task_tone()相照应

#两个要同时使用

#queue.join()

if __name__=='__main__':

main()

print "Down"

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python多线程爬取图片_python多线程爬取图片

#!/usr/bin/env python#encoding:UTF-8import reimport urllibimport threadingimport timeimport Queuedef getHtml(url):html_page=urllib.urlopen(url).read()return html_page#提取网页中图片的URLdef getUrl(html):patte...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。