04.requests实战之肯德基餐厅位置爬取
import requests
if __name__ == "__main__":
url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
city = input("enter a city:")
data = {
'cname':'',
'pid':'',
'keyword': city,
'pageIndex': '1',
'pageSize': '10',
}
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.55'
}
response = requests.post(url=url,data=data,headers=headers)
page_text = response.text
fileName = city+'.html'
with open(fileName,'w',encoding='utf-8') as fp:
fp.write(page_text)
print(fileName,'保存成功!!!')
05.爬取国家药品监督管理局化妆品生产许可信息管理系统服务平台
网址:http://scxk.nmpa.gov.cn:81/xk/
import requests
import json
if __name__ == "__main__":
# 批量获取不同企业的ID值
url = "http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList"
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.55'
}
id_list = [] #存储企业ID值
all_data_list = [] #存储所有企业的详情数据
# 参数的封装
for page in range(1,6):
data = {
'on':'true',
'page':page,
'pageSize':'15',
'productName':'',
'conditionType':'1',
'applyname':'',
'applysn':'',
}
json_ids = requests.post(url = url,data = data,headers = headers).json()
for dic in json_ids['list']:
id_list.append(dic['ID'])
# 获取企业详情数据
post_url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById'
for id in id_list:
data = {
'id':id
}
detail_json = requests.post(url = post_url ,data=data,headers = headers).json()
all_data_list.append(detail_json)
# 持久化存储all_data_list
fp = open('./AllData.json','w',encoding='utf-8')
json.dump(all_data_list,fp=fp,ensure_ascii=False)
print('Over!!')