import csv
import requests
from bs4 import BeautifulSoup
def get_label_name(label_code):
label_name = {}
for j in label_code:
req = requests.get('https://ebird.org/species/'+j)
req.encoding = 'utf-8'
html = req.text
soup = BeautifulSoup(html, features='html.parser')
bird_name = soup.find('title')
name = bird_name.text.strip()[:-8]
label_name[j] = name
return label_name
def get_label_code(csv_path):
with open(csv_path, 'r')as f:
reader = csv.reader(f, delimiter=',')
lines = list(reader)
label_code = []
for i in lines[1:]:
if i[0] not in label_code:
label_code.append(i[0])
return label_code
scv_path = './TTSdataset/bird_dataset/train_metadata.csv'
# 获取数据集的所有标签编码
label_code = get_label_code(scv_path)
# 获取标签编码对应的鸟类名称:需要用网页读取:https://ebird.org/species/+每个标签编码
label_name_dict = get_label_name(label_code)
# 将结果写入csv文件
with open('./proce_data/bird_data/bird_name.csv', 'w')as f:
writer = csv.writer(f, delimiter=',', quotechar='\"')
# 写入头标
writer.writerow(['primary_label', 'real_name'])
for key, value in label_name_dict.items():
writer.writerow([key, value])
代码记录1
最新推荐文章于 2024-10-14 09:08:48 发布