xpath爬取当当网
import requests
from lxml import etree
import time
'''
xpath爬取当当网图书畅销榜
'''
def main():
d = {}
for i in range(1,26):
resp = requests.get('http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent7-0-0-1-'+str(i))
res_xpath = etree.HTML(resp.text)
title = res_xpath.xpath('///div[@class="name"]/a/text()')
author = res_xpath.xpath('///div[@class="publisher_info"]/a[@title]/text()')
price = res_xpath.xpath('///div[@class="price"]/p/span[@class="price_n"]/text()')
star = res_xpath.xpath('///div[@class="star"]/span[@class="tuijian"]/text()')
date = res_xpath.xpath('///div[@class="publisher_info"]/span/text()')
d.setdefault('_title',[]).append(title)
d.setdefault('_author',[]).append(author)
d.setdefault('_price',[]).append(price)
d.setdefault('_star',[]).append(star)
d.setdefault('_date',[]).append(date)
print('第%s页爬取完成!' % i)
time.sleep(1)
print(d)
if __name__ == '__main__':
main()