import urllib2
import urllib
import re
import os
import multiprocessing
import time
opener=urllib2.build_opener(urllib2.ProxyHandler({"http":"proxy"}),urllib2.HTTPHandler(debuglevel=1))
urllib2.install_opener(opener)
urlstart="http://jandan.net/pic/page-"
urlend="#comments"
saveTo="D:/JANDAN/"
img=re.compile(r'img src="(http.*?\.jpg)')
def downLoadFile(urlad):
savetoFile=saveTo+urlad.split('/')[-1]
print "saveto:"+savetoFile
if os.path.exists(savetoFile)==False:
picData=urllib2.urlopen(urlad)
saveH=open(savetoFile,'wb')
saveH.write(picData.read())
saveH.close()
picData.close()
multiprocessing.util._exit_function()
return "finished:"+urlad
if __name__ == '__main__':
for i in range(1621,0,-1):
url=urlstart+str(i)+urlend
print "processing page:"+str(i)
con=urllib2.urlopen(url).read()
ms=img.findall(con)
print "pic count:"+str(len(ms))
# result=process(ms,2,4)
#print " ".join(result)
pool=multiprocessing.Pool(processes=4)
for m in ms:
print "downloading file:"+m
#downLoadFile(m)
pool.apply_async(downLoadFile,[m])
time.sleep(5)
time.sleep(3)
multiprocessing.util._exit_function()
pool.close()
pool.join();
貌似有的子进程不能及时关掉。。。
下载jandan无聊图
最新推荐文章于 2024-06-15 09:31:00 发布