马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有帐号?立即注册
x
网上的我看不懂,这是我自己学习实践写的。
import urllib.request as u
import os
from bs4 import BeautifulSoup
import lxml
import re
import threading
from queue import Queue
import datetime
def url_open(url):
try:
req = u.Request(url)
req.add_header('User-Agent' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0')
response = u.urlopen(url)
except Exception as e:
print('打开网页错误',e)
html = response.read()
html = html.decode('utf-8')
return html
def zhangjiedizhi(html):#需要url_open(url)传递
soup = BeautifulSoup(html,'lxml')
x=[]#存储各章节地址
for ts in soup.find_all('a',href=re.compile(".html")):
x.append('https://www.xbiquge6.com'+str(ts.get('href')))
x=x[12:]
return x
def zhangjieming(html):
soup = BeautifulSoup(html,'lxml')
x2=[]#存储章节名称
for ts in soup.find_all('a',href=re.compile(".html")):
x2.append(ts.get_text())
x2=x2[12:]
return x2
def gg(x,x2,q):
f = open(str(x2[0])+"x.txt",'a',encoding='utf-8')
neirong=[]
for i in range(len(x)):
try:
req = u.Request(x[i])
req.add_header('User-Agent' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0')
response = u.urlopen(x[i])
html = response.read()
html = html.decode('import urllib.request as u
import os
from bs4 import BeautifulSoup
import lxml
import re
import threading
from queue import Queue
import datetime
def url_open(url):
try:
req = u.Request(url)
req.add_header('User-Agent' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0')
response = u.urlopen(url)
except Exception as e:
print('打开网页错误',e)
html = response.read()
html = html.decode('utf-8')
return html
def zhangjiedizhi(html):#需要url_open(url)传递
soup = BeautifulSoup(html,'lxml')
x=[]#存储各章节地址
for ts in soup.find_all('a',href=re.compile(".html")):
x.append('https://www.xbiquge6.com'+str(ts.get('href')))
x=x[12:]
return x
def zhangjieming(html):
soup = BeautifulSoup(html,'lxml')
x2=[]#存储章节名称
for ts in soup.find_all('a',href=re.compile(".html")):
x2.append(ts.get_text())
x2=x2[12:]
return x2
def gg(x,x2,q):
f = open(str(x2[0])+"x.txt",'a',encoding='utf-8')
neirong=[]utf-8')
except Exception as e:
print('打开网页错误',e)
soup = BeautifulSoup(html,'lxml')
for e in soup.find_all('div',id="content"):
b=(str(x2[i])+'\n'+str(e.get_text())+'\n')
f.write(b)
f.close
print(x2[0],'写入完毕')
def thred():
q=Queue()
threads=[]
x=zhangjiedizhi(url_open(url))
x2=zhangjieming(url_open(url))
mm=len(x)
for i in range(10):
t=threading.Thread(target=gg,args=(x[int(0.1*mm*i):int(0.1*mm*(i+1))],x2[int(0.1*mm*i):int(0.1*mm*(i+1))],q))
t.start()
threads.append(t)
print('到这一切正常')
for each in threads:
each.join()
'''results=[]
for m in range(5):
results=results+(q.get())'''
print('上面的结束了')
a=''
print('到这一切正常2')
for i in range(10):
f = open(str(x2[int(0.1*i*mm)])+"x.txt",'r',encoding='utf-8')
a+=str(f.read())
f.close
x = open("小说合集.txt",'a',encoding='utf-8')
x.write(a)
x.close
if __name__ == "__main__":
starttime = datetime.datetime.now()
url='https://www.xbiquge6.com/82_82692/'
thred()
endtime = datetime.datetime.now()
print (endtime - starttime)