# import re
#
# #import this
# ----------------------------------普通正则匹配---------------------------------
# #python之禅
# example = """
# <html>
# <head>
# <title>
# index
# </title>
# </head>
# <body>
# <div id="content">
# <div class="font">
# top
# </div>
# <div class="font">
# center
# </div>
# <div class="font">
# bottom
# </div>
# </div>
# </body>
# </html>
#
# """
# result = re.findall(r'<div class="font">(.*?)</div>',example,re.S)
#
# for res in result:
# print("++++++++++++++++++++++++")
# print(res)
# print("++++++++++++++++++++++++")
# --------------------------------------多线程下载图片------------------------------
# import re
# from urllib import request
# from time import sleep
# import threading
#
# def downLoder(num,size,total):
# num = size*num/float(total)
# if num > 1:
# num = 1
# num = "%.2f%%"%(num*100)
# print(num)
#
# def downLoad(url,name):
# path = "image/%s"%name
# try:
# request.urlretrieve(url, path, downLoder)
# except:
# pass
# else:
# print("%s is down"%name)
# sleep(1)
#
# def main():
# with open("work.html","rb") as f:
# content = f.read().decode()
# srcs = re.findall(r'<img(.*?)src="(.*?)"',content,re.S)
# task = []
# for src in srcs:
# url = "http:"+src[1]
# if "?" in url:
# name = url.split("?")[0]
# else:
# name = url
# name = name.rsplit("/",1)[1]
# task.append((url,name))
# lenth = len(task)
# num = lenth/5
# if num != int(num):
# num = int(num)+1
# for i in range(1,num+1):
# start = (i-1)*5
# end = i*5
# one_task = task[start:end]
# threading_list = []
# for i in range(5):
# t = threading.Thread(target = downLoad,args = (
# one_task[i][0],
# one_task[i][1]
# ))
# threading_list.append(t)
# for t in threading_list:
# t.start()
# for t in threading_list:
# t.join()
#
#
# if __name__ == '__main__':
# main()
# ------------------------------------协程下载图片-------------------------------------
import re
from urllib import request
from time import sleep
def downLoder(num,size,total):
num = size*num/float(total)
if num > 1:
num = 1
num = "%.2f%%"%(num*100)
print(num)
def downLoad():
"""
接收参数进行下载
:return:
"""
name = ""
while True:
args = yield name
url,name = args
path = "image/%s" % name
try:
request.urlretrieve(url, path, downLoder)
except Exception as e:
print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
print(e)
print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
else:
print("%s is down"%name)
sleep(1)
def getArgs():
"""
获取传入的src通过src分析name和url然后传递给下载
:return:
"""
name = ""
d = downLoad()
next(d)
while True:
src = yield name
url = "http:" + src[1]
if "?" in url:
name = url.split("?")[0]
else:
name = url
name = name.rsplit("/",1)[1]
d.send((url,name))
def main():
"""
导入图片地址,传递地址给参数分析部分
"""
with open("work.html","rb") as f:
content = f.read().decode()
srcs = re.findall(r'<img(.*?)src="(.*?)"',content,re.S)
g = getArgs()
next(g)
for src in srcs:
g.send(src)
if __name__ == '__main__':
main()
# # --------------------------------------多进程----------------------------------
# import re
# from urllib import request
# from time import sleep
# import multiprocessing
#
# def downLoder(num,size,total):
# num = size*num/float(total)
# if num > 1:
# num = 1
# num = "%.2f%%"%(num*100)
# print(num)
#
# def downLoad(url,name):
# path = "image/%s"%name
# try:
# request.urlretrieve(url, path, downLoder)
# except:
# pass
# else:
# print("%s is down"%name)
# sleep(1)
#
# def main():
# with open("work.html","rb") as f:
# content = f.read().decode()
# srcs = re.findall(r'<img(.*?)src="(.*?)"',content,re.S)
# task = []
# for src in srcs:
# url = "http:"+src[1]
# if "?" in url:
# name = url.split("?")[0]
# else:
# name = url
# name = name.rsplit("/", 1)[1]
# task.append((url, name))
# lenth = len(task)
# num = lenth/5
# if num != int(num):
# num = int(num)+1
#
# for i in range(1,num+1):
# pool = multiprocessing.Pool(5)
# start = (i-1)*5
# end = i*5
# one_task = task[start:end]
# for j in range(5):
# pool.apply_async(downLoad, args=(one_task[j][0], one_task[j][1]))
# pool.close()
# pool.join()
#
#
#
# if __name__ == '__main__':
# main()