# coding=utf-8
import urllib2 as request
import re
import os
import threading,time,random
####
config_url_paths = [
r'''http://image.baidu.com/''',
]
config_save_path = r'''D:\\video\\t\\web\\image_cool\\52\\'''
re_fliter_jpg_full_path = re.compile(r'src="(.+?\.jpg)"')
re_filter_jpg_name = re.compile(r'/([^/]+\.jpg)')
class jpg_downloader(threading.Thread):
def __init__(self, url, filename):
global cnt_threads,mutex
threading.Thread.__init__(self)
cnt_threads = cnt_threads + 1
savepath = config_save_path + filename;
self._url = url;
self._savepath = savepath;
self._id = cnt_threads;
print('cnt:'+str(self._id)+' url:'+url+' path:'+savepath+'\r\n');
def run(self):
# global count,mutex
# threadname = threading.currentThread.getName();
jpg = request.urlopen(self._url).read()
print(str(self._id) + 'download finish \r\n')
File = open(self._savepath,'wb')
File.write(jpg)
File.flush()
File.close()
print(str(self._id) + 'thread_end \r\n')
def get_html(url):
page = request.urlopen(url)
html = page.read()
return html
def getImg(html):
imglist = re.findall(re_fliter_jpg_full_path,html)
return imglist
def downloads(urls):
global cnt_threads,mutex
cnt = 0
threads = []
cnt_threads = 0;
mutex = threading.Lock()
for url in urls:
filename = re.search(re_filter_jpg_name,url).group(1)
filename = '%03d'%cnt + "-" + filename
threads.append(jpg_downloader(url,filename));
cnt = cnt + 1
for t in threads:
t.start()
for t in threads:
t.join()
print('join')
return
print('hello ready to start')
img_list = []
for url in config_url_paths:
html = get_html(url)
img_targets = getImg(html)
for img in img_targets:
img_list.append(img)
print(len(img_list))
downloads(img_list)
print("finish")