Glove的一个小例子

import numpy as np

# 假设我们有以下句子集合
corpus = [
    "I like machine learning.",
    "I enjoy deep learning.",
    "Machine learning is fascinating.",
    "Deep learning is interesting."
]

# 构建单词-索引和索引-单词映射
word_to_idx = {}
idx_to_word = {}
for sentence in corpus:
    words = sentence.lower().split()
    for word in words:
        if word not in word_to_idx:
            idx = len(word_to_idx)
            word_to_idx[word] = idx
            idx_to_word[idx] = word

# 构建共现矩阵
vocab_size = len(word_to_idx)
cooc_matrix = np.zeros((vocab_size, vocab_size))
window_size = 2

for sentence in corpus:
    words = sentence.lower().split()
    for target_idx, target_word in enumerate(words):
        target_id = word_to_idx[target_word]

        # 使用窗口大小找到上下文词汇的索引范围
        start_idx = max(0, target_idx - window_size)
        end_idx = min(len(words), target_idx + window_size + 1)

        # 对上下文词汇进行共现计数
        for context_idx in range(start_idx, end_idx):
            if context_idx != target_idx:
                context_word = words[context_idx]
                context_id = word_to_idx[context_word]
                cooc_matrix[target_id, context_id] += 1

# 使用GloVe算法进行训练
embedding_size = 50
learning_rate = 0.01
num_epochs = 100

# 初始化参数
W = np.random.rand(vocab_size, embedding_size)
U = np.random.rand(vocab_size, embedding_size)
b = np.random.rand(vocab_size)

# 迭代训练
for epoch in range(num_epochs):
    for target_id in range(vocab_size):
        for context_id in range(vocab_size):
            cooc_count = cooc_matrix[target_id, context_id]
            if cooc_count > 0:
                # 计算预测误差
                diff = np.dot(W[target_id], U[context_id]) + b[target_id] + b[context_id] - np.log(cooc_count)

                # 更新参数
                W[target_id] -= learning_rate * diff * U[context_id]
                U[context_id] -= learning_rate * diff * W[target_id]
                b[target_id] -= learning_rate * diff
                b[context_id] -= learning_rate * diff

# 输出单词的向量表示
for word, idx in word_to_idx.items():
    print(f"{word}: {W[idx]}")


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值