诸天至尊小说

?898

于 2016-12-29 20:26:39 发布

阅读量571

点赞数 1

本文链接：https://blog.csdn.net/qq_17172105/article/details/53931956

版权

import re,os,random
from urllib import request
from bs4 import BeautifulSoup
from functools import reduce
url='http://www.aiquxs.com/read/50/50271/index.html'
req=request.Request(url)
res=request.urlopen(req).read()
soup=BeautifulSoup(res,'lxml')
name=soup.div.h3.get_text()[:-3]
t='e://电子书//%s'%name
if not os.path.isdir(t):
os.mkdir(t)
else:
pass

z=soup.div.dl
data=z.find_all('a')
m,n=[],[]
for i in data:
x=i.get_text()
#print(x)
y=re.sub('index.html',i.get('href'),url)
#print(y)
m.append(x)
n.append(y)
def h():
headers = [
{"User-Agent": "Mozilla/5.0 (Windows; U; Win 9x 4.90; en-GB; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"},
{"User-Agent": "Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.6) Gecko/20040503"},
]
return random.choice(headers)

def get(url,t):
headers=h()
req=request.Request(url)
res=request.urlopen(req).read()
soup=BeautifulSoup(res,'lxml')
c=soup.find_all("div","content")
c=re.findall("[\u300a\u300b]|[\u4e00-\u9fa5]|[\uFF00-\uFFEF]|[\,]|[\.]|[\!]",str(c))
def f(x,y):
return x+y
c=reduce(f,c)
with open(t,'w') as f:
f.write(str(c))

for i,j in zip(m,n):
t='e://电子书//%s//%s.txt'%(name,i)
if not os.path.isfile(t):
get(j,t)
print('正在下载%s'%i)
else:
print('0000000000000000')
continue

确定要放弃本次机会？

福利倒计时

: :

立减 ¥

普通VIP年卡可用

立即使用

?898

关注关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
诸天至尊小说

import re,os,randomfrom urllib import requestfrom bs4 import BeautifulSoupfrom functools import reduceurl='http://www.aiquxs.com/read/50/50271/index.html'req=request.Request(url)res=reques
复制链接

扫一扫