day04 1、get请求豆瓣电影前十页 # https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=& # start=40&limit=20 # 下载豆瓣电影前十页的数据 # 1、请求对象的定制 # 2、获取响应的数据 # 3、下载数据 import urllib.request import urllib.parse def create_request(page): base_url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&', data = { 'start': (page - 1) * 20, 'limit': 20 } data = urllib.parse.urlencode(data) print(data) url = base_url + data headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } request = urllib.request.Request(url=url, headers=headers) return request def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(page, content): with open('douban_' + str(page) + '.json', 'w', encoding='utf-8') as fp: fp.write(content) # 程序的入口 if __name__ == '__main__': start_page = int(input('请输入起始的页码')) end_page = int(input('请输入结束的页码')) for page in range(start_page, end_page + 1): # 每一页都要有自己的请求对象的定制 request = create_request(page) # 获取响应的数据 content = get_content(request) # 下载 down_load(page, content)
2、post请求KFC官网
# http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname # cname: 合肥 # pid: # pageIndex: 1 # pageSize: 10 import urllib.request import urllib.parse # base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname' def creat_request(page): base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname' data = { 'cname': '合肥', 'pid':'', 'pageIndex': page, 'pageSize': 10 } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } data = urllib.parse.urlencode(data).encode('utf-8') request = urllib.request.Request(url=base_url, data=data, headers=headers) return request def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(page,content): with open('kfc_'+str(page)+'.json','w',encoding='utf-8') as fp: fp.write(content) if __name__ == '__main__': start_page = int(input('请输入起始页码')) end_page = int(input('请输入终止页码')) for page in range(start_page,end_page+1): # 请求对象的定制 request = creat_request(page) # 获取网页源码 content = get_content(request) # 下载 down_load(page,content)