#第一页 #http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname #formdata # cname:北京 # pid: # pageIndex:1 # pageSize:10 #第二页 #http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname #post请求 #formdata # cname:北京 # pid: # pageIndex:2 # pageSize:10 import urllib.request import urllib.parse def create_request(page): base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname' data = { 'cname': '北京', 'pid':'', 'pageIndex':page, 'pageSize':'10' } data = urllib.parse.urlencode(data).encode('utf-8') headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/103' } request = urllib.request.Request(url=base_url,data=data,headers=headers) return request def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(page,content): with open('kfc'+str(page)+'.json','w',encoding='utf-8')as fp: fp.write(content) if __name__ == '__main__': start_page = int(input('起始:')) end_page = int(input('结束:')) for page in range(start_page,end_page+1): #请求对象的定制 request = create_request(page) #获取网页源码 content = get_content(request) #下载 down_load(page,content)
爬虫学习--63urllib的ajax的post请求肯德基官网
于 2024-03-22 18:52:35 首次发布