-
-
Save GavinXing/9954ea846072e115bb07d9758892382c to your computer and use it in GitHub Desktop.
| # encoding=utf-8 | |
| # Project: learn-pytorch | |
| # Author: xingjunjie github: @gavinxing | |
| # Create Time: 29/07/2017 11:58 AM on PyCharm | |
| # Basic template from http://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html | |
| import torch | |
| import torch.nn as nn | |
| import torch.autograd as autograd | |
| import torch.optim as optim | |
| import torch.nn.functional as F | |
| class CBOW(nn.Module): | |
| def __init__(self, context_size=2, embedding_size=100, vocab_size=None): | |
| super(CBOW, self).__init__() | |
| self.embeddings = nn.Embedding(vocab_size, embedding_size) | |
| self.linear1 = nn.Linear(embedding_size, vocab_size) | |
| def forward(self, inputs): | |
| lookup_embeds = self.embeddings(inputs) | |
| embeds = lookup_embeds.sum(dim=0) | |
| out = self.linear1(embeds) | |
| out = F.log_softmax(out) | |
| return out | |
| # create your model and train. here are some functions to help you make | |
| # the data ready for use by your module | |
| def make_context_vector(context, word_to_ix): | |
| idxs = [word_to_ix[w] for w in context] | |
| tensor = torch.LongTensor(idxs) | |
| return autograd.Variable(tensor) | |
| # print(make_context_vector(data[0][0], word_to_ix)) # example | |
| if __name__ == '__main__': | |
| CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right | |
| EMBEDDING_SIZE = 10 | |
| raw_text = """We are about to study the idea of a computational process. | |
| Computational processes are abstract beings that inhabit computers. | |
| As they evolve, processes manipulate other abstract things called data. | |
| The evolution of a process is directed by a pattern of rules | |
| called a program. People create programs to direct processes. In effect, | |
| we conjure the spirits of the computer with our spells.""".split() | |
| # By deriving a set from `raw_text`, we deduplicate the array | |
| vocab = set(raw_text) | |
| vocab_size = len(vocab) | |
| word_to_ix = {word: i for i, word in enumerate(vocab)} | |
| data = [] | |
| for i in range(2, len(raw_text) - 2): | |
| context = [raw_text[i - 2], raw_text[i - 1], | |
| raw_text[i + 1], raw_text[i + 2]] | |
| target = raw_text[i] | |
| data.append((context, target)) | |
| loss_func = nn.CrossEntropyLoss() | |
| net = CBOW(CONTEXT_SIZE, embedding_size=EMBEDDING_SIZE, vocab_size=vocab_size) | |
| optimizer = optim.SGD(net.parameters(), lr=0.01) | |
| for epoch in range(100): | |
| total_loss = 0 | |
| for context, target in data: | |
| context_var = make_context_vector(context, word_to_ix) | |
| net.zero_grad() | |
| log_probs = net(context_var) | |
| loss = loss_func(log_probs, autograd.Variable( | |
| torch.LongTensor([word_to_ix[target]]) | |
| )) | |
| loss.backward() | |
| optimizer.step() | |
| total_loss += loss.data | |
| print(total_loss) |
Where do you update the embeddings? It seems to me that this is simply training to predict a word given the context, but I don't see where the embeddings are updated (or even what they would be updated with).
Running the code gives error "RuntimeError: dimension out of range (expected to be in range of [-1, 0], but got 1)" error...What could be the reason?
To make it work, in CBOW.forward() comment out line 24: out = F.log_softmax(out). Also update line 74 to read loss = loss_func(log_probs.view(-1,1), autograd.Variable(.
To make it work, in
CBOW.forward()comment out line 24:out = F.log_softmax(out). Also update line 74 to readloss = loss_func(log_probs.view(-1,1), autograd.Variable(.
line 74 to read
loss = loss_func(log_probs.view(1,-1), autograd.Variable(.
works for me
why context_size is unused?
Btw
CONTEXT_SIZEis unused