预期效果
爬取每个城市(300+城市)所有的肯德基门店信息,保存到excel中,效果如下:
代码实现
import requests
import csv
import json
def getData(page,key):
url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'}
data={
'cname':'',
'pid':'',
'keyword': key,
'pageIndex': page,
'pageSize': '10'
}
res=requests.post(url=url,data=data,headers=headers)
json_data=res.json()
print(json_data)
count=json_data['Table'][0]['rowcount']
for i in json_data['Table1']:
storeName=i['storeName']
cityName=i['cityName']
addressDetail=i['addressDetail']
pro=i['pro']
print([storeName, cityName,addressDetail,pro])
with open('kfc_store.csv', mode='a', newline='') as f:
writer = csv.writer(f, delimiter=',')
writer.writerow([key,storeName, cityName,addressDetail,pro])
if __name__=='__main__':
# 从拉勾网上爬取全国各地城市,复制粘贴网页内容到txt文件再,接口如下。
# URL='https://www.lagou.com/lbs/getAllCitySearchLabels.json'
f2 = open('ci.txt', encoding='utf-8')
ts = f2.readline()
print(ts)
json_ts = json.loads(ts)
cs = json_ts['content']['data']['allCitySearchLabels']
place = []
m = 0
for c in cs:
for i in cs[c]:
m = m + 1
place.append(i['name'])
print(f'一共有{m}个城市')
print(place)
# place=['北京','上海','广州','深圳']
for key in place:
url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'}
data = {
'cname': '',
'pid': '',
'keyword': key,
'pageIndex': 1,
'pageSize': '10'
}
res = requests.post(url=url, data=data, headers=headers)
json_data = res.json()
# print(json_data)
count = json_data['Table'][0]['rowcount']
c = count // 10 + 2
for n in range(1,c):
print(f'===========正在获取{key}数据===========')
print('===========正在获取第{}页数据==========='.format(n))
getData(n,key)