主要问题在这:
for txt in list_:
txts = txt.get_text()
#在这里 print(txts), 结果还是完整的
download_run(title1, title2, title3, title4, txts)
#在这里 print(txts), 数据只剩最后一个段落了
代码如下:
def download(href_urls):
for url in href_urls:
mod_titles = []
ses = requests.session()
html = ses.get(url, headers = header(), verify = False)
soup = BeautifulSoup(html.content, 'html.parser')
title_list = soup.find(class_ = 'g-ctnBar').find_all('a')
title1 = title_list[2].get_text()
title2 = title_list[3].get_text()
title3 = title_list[4].get_text()
title4 = title_list[5].get_text()
list_ = soup.find_all('div', class_ = 'detail-mod J_floor')[:-3]
for txt in list_:
txts = txt.get_text()
download_run(title1, title2, title3, title4, txts)
def download_run(title1, title2, title3, title4, txts):
path = 'C:/Users/Desktop/run/%s/%s/%s' %(title1, title2, title3)
if not os.path.exists(path):
os.makedirs(path)
with open('C:/Users/Desktop/run/%s/%s/%s/%s.txt' %(title1, title2, title3, title4), 'w')as f:
f.write(txts)