差总体调试,未调试原因:IP被封
import requests
from lxml import etree
def urlBuild(startNum) :
url = "https://book.douban.com/top250?start={0}".format(startNum)
return url
def urlBuilder() :
urlSet = []
for i in [x * 25 for x in range(10)] :
urlSet.append(urlBuild(i))
return urlSet
def contentBuild(urlSet) :
listName = []
listAuthor = []
for url in urlSet :
htmlText = htmlContentGet(url)
bookItems = bookItemGet(htmlText)
for item in bookItems:
bookMessage = singleBookContentGet(item)
listName.append(bookMessage[0])
listAuthor.append(bookMessage[1])
print(bookMessage[0],bookMessage[1])
result = [listName,listAuthor]
return result
def htmlContentGet(url) :
result = requests.get(url)
return result.text
def bookItemGet(htmlText) :
selector = etree.HTML(htmlText)
result = selector.xpath('//tr[@class="item"]')
return result
def singleBookContentGet(bookItem) :
nameMessage = bookItem.xpath('td[@valign="top"]/div[@class="pl2"]/a/text()')
authorMessage = bookItem.xpath('td[@valign="top"]/p[@class="pl"]/text()')
nameMessage = str(nameMessage[0]) .strip()
authorMessage = str(authorMessage[0]).split('/')
authorMessage = authorMessage[0]
result = [nameMessage,authorMessage]
return result
def save() :
pass
if __name__ == '__main__':
urlSet = urlBuilder()
allBook = contentBuild(urlSet)
for i in allBook :
print(i)