plda源码(二)

vocabulary.cc
存储单词到id的映射

class Vocabulary {
 public:

    int GetOrCreateID(string word,bool &created);

    bool GetID(string word, int &id) const;

    bool GetWordByID(int id, string &word) const;

    bool Load(string filename);

    map<string, int> & GetMap();

 private:
    map<string, int> word_ids;
    vector<string> id_words;
    int cur_id;
};

int Vocabulary::GetOrCreateID(string word, bool &created) {
    int id = cur_id;
    auto ret = word_ids.insert(std::pair<string, int>(word, id));
    if (ret.second == false) {
        created = false;
        return ret.first->second;
    } else {
        created = true;
        cur_id++;
        id_words.push_back(word);
        return id;
    }
}

bool Vocabulary::GetID(string word, int &id) const {
    map<string, int>::const_iterator itr = word_ids.find(word);
    if (itr != word_ids.end()) {
        id = itr->second;
        return true;
    }
    return false;
}

bool Vocabulary::GetWordByID(int id, string &word) const {
    if (id >= cur_id) {
        return false;
    }
    word = id_words[id];
    return true;
}

bool Vocabulary::Load(string filename) {
    std::ifstream infile(filename);
    if (infile.fail()) {
        std::cerr << "Vocabulary::Load open file error, file:" << filename
                  << "\n";
        return false;
    }

    string line;
    while (getline(infile, line)) {
        bool created;
        GetOrCreateID(line, created);
        if (!created) {
            std::cerr << "Vocabulary::Load ,duplicated word:" << line << "\n";
            return false;
        }
    }
    infile.close();
    return true;
}

map<string, int> & Vocabulary::GetMap(){
    return word_ids;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值