代码记录1

import csv
import requests
from bs4 import BeautifulSoup


def get_label_name(label_code):
    label_name = {}
    for j in label_code:
        req = requests.get('https://ebird.org/species/'+j)
        req.encoding = 'utf-8'
        html = req.text
        soup = BeautifulSoup(html, features='html.parser')
        bird_name = soup.find('title')
        name = bird_name.text.strip()[:-8]
        label_name[j] = name
    return label_name


def get_label_code(csv_path):
    with open(csv_path, 'r')as f:
        reader = csv.reader(f, delimiter=',')
        lines = list(reader)
    label_code = []
    for i in lines[1:]:
        if i[0] not in label_code:
            label_code.append(i[0])
    return label_code

scv_path = './TTSdataset/bird_dataset/train_metadata.csv'
# 获取数据集的所有标签编码
label_code = get_label_code(scv_path)

# 获取标签编码对应的鸟类名称:需要用网页读取:https://ebird.org/species/+每个标签编码
label_name_dict = get_label_name(label_code)

# 将结果写入csv文件
with open('./proce_data/bird_data/bird_name.csv', 'w')as f:
    writer = csv.writer(f, delimiter=',', quotechar='\"')

    # 写入头标
    writer.writerow(['primary_label', 'real_name'])

    for key, value in label_name_dict.items():
        writer.writerow([key, value])
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值