Python WORK 03 – 提取HTML中的图片URL
def main():
input_file = '0503.html'
output_file = '0503.txt'
html_lines = get_html_lines(input_file)
image_urls = extract_image_urls(html_lines)
show_results(image_urls)
save_results(output_file, image_urls)
def get_html_lines(path):
f = open(path, "r", encoding='gbk')
Is =f.readlines()
f.close()
return Is
def extract_image_urls(html_list):
urls = []
for line in html_list:
if 'img' in line:
url = line.split('src=')[-1].split('"')[1]
if 'http' in url:
urls.append(url)
return urls
def show_results(urls):
count = 0
for url in urls:
print('第{:2}个URL:{}'.format(count, url))
count+=1
def save_results(path, urls):
f = open(path, "w")
for url in urls:
f.write(url + "\n")
f.close()
main()