import urllib.request import urllib.parse import json #请求对象定制 def get_request(page): base_url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&' data = { 'start':(page-1)*20, 'limit':20 } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', } data = urllib.parse.urlencode(data) #此处为ajax的get请求,在发送请求时必须拼接参数 url = base_url + data request = urllib.request.Request(url=url,headers=headers) return request def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(page,content): with open('douban'+str(page)+'.json','w',encoding='utf-8') as fp: fp.write(content) #程序入口 if __name__ == '__main__': start_page = int(input('输入起始页>>')) end_page = int(input('输入起始页>>')) for page in range(start_page,end_page+1): #请求对象定制 request = get_request(page) #获取响应数据 content = get_content(request) #下载 down_load(page,content)
Day3 python爬虫基础,ajax的get请求获取豆瓣前10页数据(每页20条)
于 2023-05-14 09:21:17 首次发布