前情提要:本文使用Nominatim库进行爬取
提醒:在进行爬取之前记得先装装上Nominatim库
可以使用
pip install geopy
也可以在settings里查找安装
然后就进入爬虫编写啦(时而可以,时而不可以
from geopy.geocoders import Nominatim
def get_coordinates(city_name):
geolocator = Nominatim(user_agent="my_geocoder")
location = geolocator.geocode(city_name + ", 中国", language='zh-CN', timeout=10)
if location:
latitude, longitude = location.latitude, location.longitude
return latitude, longitude
else:
return None
def save_to_txt(city_coordinates_dict, file_path):
with open(file_path, 'w', encoding='utf-8') as file:
for city, coordinates in city_coordinates_dict.items():
file.write(f"{city}: {coordinates[0]}, {coordinates[1]}\n")
def main():
# 中国省会城市列表
china_province_capitals = ["北京", "天津", "上海", "重庆", "哈尔滨", "长春", "沈阳", "呼和浩特", "石家庄",
"乌鲁木齐",
"兰州", "西宁", "西安", "银川", "郑州", "济南", "太原", "合肥", "南京", "杭州",
"福州", "南昌", "广州", "深圳", "南宁", "海口", "成都", "贵阳", "昆明", "拉萨",
"武汉", "长沙", "南宁", "香港", "澳门"]
city_coordinates_dict = {}
for city in china_province_capitals:
coordinates = get_coordinates_with_retry(city)
if coordinates:
city_coordinates_dict[city] = coordinates
print(f"{city}的经纬度:{coordinates}")
else:
print(f"无法获取{city}的经纬度信息。")
save_to_txt(city_coordinates_dict, 'china_province_capitals.txt')
if __name__ == "__main__":
main()
(成功版 较为稳定
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
def get_coordinates_with_retry(city_name):
geolocator = Nominatim(user_agent="my_geocoder")
# 设置最大重试次数
max_retries = 3
retries = 0
while retries < max_retries:
try:
location = geolocator.geocode(city_name + ", 中国", language='zh-CN', timeout=10)
if location:
latitude, longitude = location.latitude, location.longitude
return latitude, longitude
else:
return None
except GeocoderTimedOut:
retries += 1
print(f"获取 {city_name} 经纬度信息超时,正在重试 {retries}/{max_retries}...")
print(f"无法获取 {city_name} 的经纬度信息。")
return None
def save_to_txt(city_coordinates_dict, file_path):
with open(file_path, 'w', encoding='utf-8') as file:
for city, coordinates in city_coordinates_dict.items():
file.write(f"{city}: {coordinates[0]}, {coordinates[1]}\n")
def main():
# 中国省会城市列表
china_province_capitals = ["北京", "天津", "上海", "重庆", "哈尔滨", "长春", "沈阳", "呼和浩特", "石家庄",
"乌鲁木齐",
"兰州", "西宁", "西安", "银川", "郑州", "济南", "太原", "合肥", "南京", "杭州",
"福州", "南昌", "广州", "深圳", "南宁", "海口", "成都", "贵阳", "昆明", "拉萨",
"武汉", "长沙", "南宁", "香港", "澳门"]
city_coordinates_dict = {}
for city in china_province_capitals:
coordinates = get_coordinates_with_retry(city)
if coordinates:
city_coordinates_dict[city] = coordinates
print(f"{city}的经纬度:{coordinates}")
else:
print(f"无法获取{city}的经纬度信息。")
save_to_txt(city_coordinates_dict, 'china_province_capitals.txt')
if __name__ == "__main__":
main()
成果展示:
(第一次发文章,是实训过程中出现的一些问题解决。如果期间有什么错误欢迎大家进行改正)