用正则表达式轻松搞定:
import re
def lets_fuck_it():
replace_pattern = r'<[img|IMG].*?/>' #img标签的正则式
img_url_pattern = r'.+?src="(\S+)"' #img_url的正则式
replaced_img_url_list = []
img_url_list = []
need_replace_list = re.findall(replace_pattern, html)#找到所有的img标签
for tag in need_replace_list:
img_url_list.append(re.findall(img_url_pattern, tag)[0])#找到所有的img_url