import re,os,random
from urllib import request
from bs4 import BeautifulSoup
from functools import reduce
url='http://www.aiquxs.com/read/50/50271/index.html'
req=request.Request(url)
res=request.urlopen(req).read()
soup=BeautifulSoup(res,'lxml')
name=soup.div.h3.get_text()[:-3]
t='e://电子书//%s'%name
if not os.path.isdir(t):
os.mkdir(t)
else:
pass
z=soup.div.dl
data=z.find_all('a')
m,n=[],[]
for i in data:
x=i.get_text()
#print(x)
y=re.sub('index.html',i.get('href'),url)
#print(y)
m.append(x)
n.append(y)
def h():
headers = [
{"User-Agent": "Mozilla/5.0 (Windows; U; Win 9x 4.90; en-GB; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"},
{"User-Agent": "Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.6) Gecko/20040503"},
]
return random.choice(headers)
def get(url,t):
headers=h()
req=request.Request(url)
res=request.urlopen(req).read()
soup=BeautifulSoup(res,'lxml')
c=soup.find_all("div","content")
c=re.findall("[\u300a\u300b]|[\u4e00-\u9fa5]|[\uFF00-\uFFEF]|[\,]|[\.]|[\!]",str(c))
def f(x,y):
return x+y
c=reduce(f,c)
with open(t,'w') as f:
f.write(str(c))
for i,j in zip(m,n):
t='e://电子书//%s//%s.txt'%(name,i)
if not os.path.isfile(t):
get(j,t)
print('正在下载%s'%i)
else:
print('0000000000000000')
continue
from urllib import request
from bs4 import BeautifulSoup
from functools import reduce
url='http://www.aiquxs.com/read/50/50271/index.html'
req=request.Request(url)
res=request.urlopen(req).read()
soup=BeautifulSoup(res,'lxml')
name=soup.div.h3.get_text()[:-3]
t='e://电子书//%s'%name
if not os.path.isdir(t):
os.mkdir(t)
else:
pass
z=soup.div.dl
data=z.find_all('a')
m,n=[],[]
for i in data:
x=i.get_text()
#print(x)
y=re.sub('index.html',i.get('href'),url)
#print(y)
m.append(x)
n.append(y)
def h():
headers = [
{"User-Agent": "Mozilla/5.0 (Windows; U; Win 9x 4.90; en-GB; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"},
{"User-Agent": "Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.6) Gecko/20040503"},
]
return random.choice(headers)
def get(url,t):
headers=h()
req=request.Request(url)
res=request.urlopen(req).read()
soup=BeautifulSoup(res,'lxml')
c=soup.find_all("div","content")
c=re.findall("[\u300a\u300b]|[\u4e00-\u9fa5]|[\uFF00-\uFFEF]|[\,]|[\.]|[\!]",str(c))
def f(x,y):
return x+y
c=reduce(f,c)
with open(t,'w') as f:
f.write(str(c))
for i,j in zip(m,n):
t='e://电子书//%s//%s.txt'%(name,i)
if not os.path.isfile(t):
get(j,t)
print('正在下载%s'%i)
else:
print('0000000000000000')
continue