参考别人,自己摸索,整到凌晨快一点,成功!
嘛也不说,码上!
保存文件那个地方可以用 .value,,出来名称就是数字的了,否则带中括号,直接用了一个LIST
import requests from bs4 import BeautifulSoup import time import re def get_data(url,header): response=requests.get(url,header) return response.content def get_text(url,header): soup_texts=BeautifulSoup(get_data(url,header),'lxml') texts=soup_texts.find_all(id='content',class_='showtxt') soup_text=BeautifulSoup(str(texts),'lxml') return soup_text.div.text.replace('\xa0','') def get_url(url,header,listpath): response = requests.get(url, header) soup=BeautifulSoup(response.content,'lxml') urllist=soup.find_all('div',{'class','listmain'}) url_down_list=BeautifulSoup(str(urllist),'lxml') url_get_list=[] f=open(listpath,'a') begin_flag=False for child in url_down_list.dl.children: if child!='\n': if child.string== u"《一念永恒》正文卷": begin_flag=True if begin_flag==True and child.a!=None: down_url="http://www.biqukan.com" + child.a.get('href') url_get_list.append(down_url) down_name=child.string f.write(down_name+' '+down_url+'\n') f.close() return url_get_list def save_file(path,text): f=open(path,'a') f.write(text) f.close() if __name__=='__main__': url = 'http://www.biqukan.com/1_1094/' header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'} listpath='H:/novel/namelist.txt' urls=get_url(url, header, listpath) num=len(urls) count=0 for url_r in urls: text=get_text(url_r,header) patt=re.compile(r'[0-9]{7}|[0-9]{8}') name_save=re.findall(patt,url_r) textpath='H:/novel/'+str(name_save)+'.txt' save_file(textpath,text) count+=1 print('已下载%.3f%%'%(count/num*100)) time.sleep(1)