from re import findall
from urllib.request import urlopen
import os,shutil
import zipfile
def makedir(path):
os.chdir("/")
isExists=os.path.exists(path)
if isExists:
shutil.rmtree(path)
else:
os.mkdir(path)
def getwechatimg(idx,url):
path="/content/sample_data/wechat"+str(idx)
makedir(path)
os.chdir(path)
# url = input()
with urlopen(url) as fp:
content = fp.read().decode()
pattern = 'data-src="(.+?)"'
result = findall(pattern, content)
for index, item in enumerate(result):
if item.endswith("jpeg"):
with urlopen(str(item)) as fp:
with open(str(index)+'.jpeg', 'wb') as fp1:
fp1.write(fp.read())
z = zipfile.ZipFile("/content/sample_data/download_"+str(idx)+".zip", 'w', zipfile.ZIP_DEFLATED)
startdir = path
for dirpath, dirnames, filenames in os.walk(startdir):
fpath = path.replace(dirpath,'')
for filename in filenames:
z.write(os.path.join(fpath, filename))
z.close()
shutil.rmtree(path)
'''items 引号内添加文章链接'''
items=["",""]
for index,item in enumerate(items):
getwechatimg(index,item)