第一步:从xls提取相关下载链接,保存为txt文件
第二步,从txt文件链接,通过程序进行下载
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import xlrd
import xlwt
from six.moves import urllib
import os
import sys
def download_and_extract(filepath, save_dir):
"""根据给定的URL地址下载文件
Parameter:
filepath: list 文件的URL路径地址
save_dir: str 保存路径
Return:
None
"""
for url, index in zip(filepath, range(len(filepath))):
filename = url.split('/')[-1]
print(" " + filename)
if filename=='end':
break
save_path = os.path.join(save_dir, filename)
urllib.request.urlretrieve(url, save_path)
sys.stdout.write('\r>> Downloading %.1f%%' % (float(index + 1) / float(len(filepath)) * 100.0))
sys.stdout.flush()
print('\nSuccessfully downloaded')
def _get_file_urls(file_url_txt):
"""根据URL路径txt文件,获取URL地址列表
Parameter:
file_url_txt: str txt文件本地路径
Return:
filepath: list URL列表
"""
filepath = []
file = open(file_url_txt, 'r')
for line in file.readlines():
line = line.strip()
filepath.append(line)
file.close()
return filepath
def getFile():
print("start **** ")
file_url_txt = 'file_url_txt.txt'
save_dir = 'save_dir_8000-9015/'
filepath = _get_file_urls(file_url_txt)
download_and_extract(filepath, save_dir)
print("end **** ")
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
str1 = 'http://chiccircle-ali-oss-beijing.oss-cn-beijing.aliyuncs.com/'
readbook = xlrd.open_workbook(r'D:/image_1/image.xlsx')
sheet = readbook.sheet_by_index(0)
nrows = sheet.nrows # 行
ncols = sheet.ncols # 列
print(nrows," ", ncols)
print("*********")
result = ''
#arr = range(3999, 2899, -1)
for i in range(8000,9015):
lng = sheet.cell(i, 4).value
str2 = str1 + lng
print(str2)
if (lng == ''):
continue
result += str2 + '\n'
result = result + 'end'
with open("file_url_txt.txt", "w", encoding='utf-8') as f:
f.write(str(result + '\n'))
f.close()
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm') #第一步 分别执行
#getFile() #第二步
# See PyCharm help at https://www.jetbrains.com/help/pycharm/