#coding:utf8
import time
from urllib import request
from bs4 import BeautifulSoup
num = 1#用来计算一共爬取了多少本书
start_time = time.time()#定位一个开始的时间
url = 'https://book.douban.com/series/128?page=1'
html = request.urlopen('https://book.douban.com/series/128?page=1')
bsObj = BeautifulSoup(html,'lxml')
print (bsObj)
f = open('/root/桌面/豆瓣/1.txt','a')
for i in range(1,3):
for e in (100,400,15):
html = request.urlopen('https://book.douban.com/series/%d?page=%d'%(e,i))
bsObj = BeautifulSoup(html,'lxml')
print ("=============" + "第%d页"%i + "==============")
h2_list = bsObj.find_all('h2')
print (h2_list)
for h2_node in h2_list:
a_node = h2_node.a
#print (a_node)
title = a_node.attrs["title"]
title = "<<" + title + ">>"
print ("第%d本书籍"%num,title,file=f)
num +=1
time.sleep(2)
end_time = time.time()
duration_time = start_time - end_time
print ('运行时间一共%.2f: '%duration_time+'秒')
print ('共抓到%d本书籍'%num-1)
f.close()
转载于:https://blog.51cto.com/9694110/2045073