#incoding utf-8
import requests
import bs4
from bs4 import BeautifulSoup
menu=[]
def Request_page(url):
headers = {
'User-Agent': 'Mozilla/5.0(Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome / 53.0 .2785 .89 Safari / 537.36'
}
results = requests.get(url,headers=headers)
results.encoding='gbk'
return results.text
def Read_page(str):
soup = BeautifulSoup(str,features="html5lib")
for string in soup.dt.next_siblings:
temp={}
if type(string) == bs4.element.Tag:
if type(string.a) == bs4.element.Tag:
temp["title"] = string.a.string
temp["href"] = string.a.attrs["href"]
menu.append(temp)
def save2file(name,content):
with open("夜天子.txt",'a+') as fd:
fd.write('\n'+name+'\n')
fd.write(content)
def Read_text(str):
start = str.index('<div id="content">')
content = str[start+18:]
end = content.index('</div>')
text = content[:end]
data1 = text.replace('<br /> ','\n')
data2 = data1.replace(' ','')
data3 = data2.replace('<br />','')
return data3
def main():
url = "https://www.booktxt.net/0_4/"
str = Request_page(url)
Read_page(str)
for info in menu:
strurl = url+info["href"]
str2 = Request_page(strurl)
content = Read_text(str2)
save2file(info["title"],content)
if __name__ == '__main__':
main()