from lxml import etree
import time, random
import requests
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/56.0.2924.87 Safari/537.36"}
# proxies = {
# 'http://217.23.73.172',
# 'http://222.186.45.14',
# '222.186.45.145'
# }
def crow():
try: # 简单的容错处理
for i in range(11):
url = 'https://movie.douban.com/top250?start='+str(25*i)
html = requests.get(url, headers=headers).text
html = etree.HTML(html)
datas = html.xpath('//ol[@class="grid_view"]/li')
a = 0
for data in datas:
title = data.xpath('//div/div[2]/div[@class="hd"]/a/span[@class="title"]/text()')
infor = data.xpath('//div/div[2]/div[@class="bd"]/p/text()')
numbers = data.xpath('//div/div[2]/div[@class="bd"]/div/span[4]/text()')
quote = data.xpath('//div/div[2]/div[@class="bd"]/p/span[@class="inq"]/text()')
rating_num = data.xpath('//div/div[2]/div[@class="bd"]/div/span[@class="rating_num"]/text()')
for item in range(25):
print(a)
a += 1
print("\n~~~正在打印第%d条数据~~~\n" % (25*i+item))
time.sleep(random.random()*3)
print(title[item])
print('\n')
print(infor[item])
print(rating_num[item])
print(quote[item])
print(numbers[item])
except():
print('Error~')
if __name__ == '__main__':
crow()