import re
import requests
from bs4 import BeautifulSoup
def getHTML(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def getContent(url):
html = getHTML(url)
print("url=",url)
soup = BeautifulSoup(html, 'html.parser')
title = soup.select('h3,label,span')
print("%s", title)
return title
def saveFile(text):
f = open('novel.txt', 'a', encoding='utf-8')
for t in text:
if len(t) > 0:
f.writelines(t.get_text() + "\n")
print("456", t)
f.close()
def main():
li = [1436,1467,1471,1484,1485,1486,4199,4200,16719,16937,17076,17251,17886,18186,18219,18263,18264,18638,20244]
for j in li:
print("j=", j)
url01 = 'http://10.1.20.14/redir.php?catalog_id=6&cmd=learning&tikubh='+str(j)+'&page='
print(url01)
for i in range(100):
url02 = url01+str(i)
print(url02)
text = getContent(url02)
print(text)
saveFile(text)
# print("i=",i);
main()
12-23
707
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
06-08
683
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)