import json import requests from requests.exceptions import RequestException import re import time def get_one_page(url): try: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36' } response = requests.get(url, headers=headers) if response.status_code == 200: return response.content.decode('utf-8') return None except RequestException: return None def parse_one_page(html): pattern = re.compile( '<h3>.*?class="l">(.*?)</a>.*?<small>Rank.*?</small>(\d+)</span>', re.S) items = re.findall(pattern, html) print(items) def main(offset): url = 'http://bangumi.tv/anime/browser?sort=rank&page=' + str(offset) html = get_one_page(url) parse_one_page(html) if __name__ == '__main__': for i in range(1,20): main(offset=i) time.sleep(1)