CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right
EMBEDDING_DIM = 100
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()
# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)
word_to_ix = {word:i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text)-2):
context = [raw_text[i-2], raw_text[i-1],
raw_text[i+1], raw_text[i+2]]
target = raw_text[i]
data.append((context, target))
print(data[:5])
测试打印 data[:5] 为:
[(['We', 'are', 'to', 'study'], 'about'),
(['are', 'about', 'study', 'the'], 'to'),
(['about', 'to', 'the', 'idea'], 'study'),
(['to', 'study', 'idea', 'of'], 'the'),
(['study', 'the', 'of', 'a'], 'idea')]
构建模型:
class CBOW(nn.Module):
def __init__(self, vocab_size, n_dim, context_size):
super(CBOW, self).__init__()
self.embeddings = nn.Embedding(vocab_size, n_dim)
self.linear1 = nn.Linear(2*context_size * n_dim, 128)
self.linear2 = nn.Linear(128, vocab_size)
def forward(self, inputs):
embeds = self.embeddings(inputs).view(1, -1)
out = F.relu(self.linear1(embeds))
out = self.linear2(out)
log_probs = F.log_softmax(out, dim=1)
return log_probs
# create your model and train. here are some functions to help you make
# the data ready for use by your module
def make_context_vector(context, word_to_ix):
idxs = [word_to_ix[w] for w in context]
return torch.tensor(idxs, dtype=torch.long)
print(make_context_vector(data[0][0], word_to_ix)) # example
model = CBOW(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
if torch.cuda.is_available():
model = model.cuda()
losses = []
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
for epoch in range(200):
total_loss = 0
context_one_hots = []
for context, target in data:
context_vector = make_context_vector(context, word_to_ix)
target = torch.tensor([word_to_ix[target]], dtype=torch.long)
if torch.cuda.is_available():
context_vector = context_vector.cuda()
target = target.cuda()
optimizer.zero_grad()
log_probs = model(context_vector)
loss = loss_function(log_probs, target)
loss.backward()
optimizer.step()
total_loss += loss.item()
print("epoch", epoch, " -->", total_loss)
losses.append(total_loss)