def get_html(htmlname):
f1 = open("{}.html".format(htmlname),"r")
ls = f1.readlines()
f1.close()
return ls
def get_urls(htmllist):
urls = []
for line in htmllist:
if 'img' in line:
url = line.split("src=")[-1].split('"')[1]
if "http" in url:
urls.append(url)
return urls
def result(urls):
count = 0
for url in urls:
print("第{:2}个URL:{}".format(count,url))
count += 1
def save_urls(filename,urls):
f2 = open("{}.txt".format(filename),"w")
for url in urls:
f.write(url + "\n")
f2.close()
def main():
htmlname = input("请输入需要提取文件名称:")
filename = input("请输入提取结果文件名称:")
htmllist = get_html(htmlname)
imageurls = get_urls(htmllist)
result(imageurls)
save_urls(filename,imageurls)
main()