graykode#57 (refactor) update above pytorch 1.0.0 with refactoring code

lxfz · Aug 14, 2020 · 5c4ed7f · 5c4ed7f
1 parent 2cc3124
commit 5c4ed7f
Show file tree

Hide file tree

Showing 12 changed files with 625 additions and 685 deletions.
diff --git a/1-1.NNLM/NNLM.py b/1-1.NNLM/NNLM.py
@@ -1,33 +1,17 @@
+# %%
 # code by Tae Hwan Jung @graykode
-import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
-from torch.autograd import Variable
 
-dtype = torch.FloatTensor
-
-sentences = [ "i like dog", "i love coffee", "i hate milk"]
-
-word_list = " ".join(sentences).split()
-word_list = list(set(word_list))
-word_dict = {w: i for i, w in enumerate(word_list)}
-number_dict = {i: w for i, w in enumerate(word_list)}
-n_class = len(word_dict) # number of Vocabulary
-
-# NNLM Parameter
-n_step = 2 # n-1 in paper
-n_hidden = 2 # h in paper
-m = 2 # m in paper
-
-def make_batch(sentences):
+def make_batch():
     input_batch = []
     target_batch = []
 
     for sen in sentences:
-        word = sen.split()
-        input = [word_dict[n] for n in word[:-1]]
-        target = word_dict[word[-1]]
+        word = sen.split() # space tokenizer
+        input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input
+        target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'
 
         input_batch.append(input)
         target_batch.append(target)
@@ -39,44 +23,56 @@ class NNLM(nn.Module):
     def __init__(self):
         super(NNLM, self).__init__()
         self.C = nn.Embedding(n_class, m)
-        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
-        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
-        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
-        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
-        self.b = nn.Parameter(torch.randn(n_class).type(dtype))
+        self.H = nn.Linear(n_step * m, n_hidden, bias=False)
+        self.d = nn.Parameter(torch.ones(n_hidden))
+        self.U = nn.Linear(n_hidden, n_class, bias=False)
+        self.W = nn.Linear(n_step * m, n_class, bias=False)
+        self.b = nn.Parameter(torch.ones(n_class))
 
     def forward(self, X):
-        X = self.C(X)
+        X = self.C(X) # X : [batch_size, n_step, n_class]
         X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]
-        tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
-        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
+        tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden]
+        output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class]
         return output
 
-model = NNLM()
+if __name__ == '__main__':
+    n_step = 2 # number of steps, n-1 in paper
+    n_hidden = 2 # number of hidden size, h in paper
+    m = 2 # embedding size, m in paper
+
+    sentences = ["i like dog", "i love coffee", "i hate milk"]
+
+    word_list = " ".join(sentences).split()
+    word_list = list(set(word_list))
+    word_dict = {w: i for i, w in enumerate(word_list)}
+    number_dict = {i: w for i, w in enumerate(word_list)}
+    n_class = len(word_dict)  # number of Vocabulary
 
-criterion = nn.CrossEntropyLoss()
-optimizer = optim.Adam(model.parameters(), lr=0.001)
+    model = NNLM()
 
-input_batch, target_batch = make_batch(sentences)
-input_batch = Variable(torch.LongTensor(input_batch))
-target_batch = Variable(torch.LongTensor(target_batch))
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
 
-# Training
-for epoch in range(5000):
+    input_batch, target_batch = make_batch()
+    input_batch = torch.LongTensor(input_batch)
+    target_batch = torch.LongTensor(target_batch)
 
-    optimizer.zero_grad()
-    output = model(input_batch)
+    # Training
+    for epoch in range(5000):
+        optimizer.zero_grad()
+        output = model(input_batch)
 
-    # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
-    loss = criterion(output, target_batch)
-    if (epoch + 1)%1000 == 0:
-        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
+        # output : [batch_size, n_class], target_batch : [batch_size]
+        loss = criterion(output, target_batch)
+        if (epoch + 1) % 1000 == 0:
+            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
 
-    loss.backward()
-    optimizer.step()
+        loss.backward()
+        optimizer.step()
 
-# Predict
-predict = model(input_batch).data.max(1, keepdim=True)[1]
+    # Predict
+    predict = model(input_batch).data.max(1, keepdim=True)[1]
 
-# Test
-print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
+    # Test
+    print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
diff --git a/1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py b/1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py
@@ -1,93 +1,82 @@
-'''
-  code by Tae Hwan Jung(Jeff Jung) @graykode
-'''
+# %%
+# code by Tae Hwan Jung @graykode
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
-from torch.autograd import Variable
 import matplotlib.pyplot as plt
 
-dtype = torch.FloatTensor
-
-# 3 Words Sentence
-sentences = [ "i like dog", "i like cat", "i like animal",
-              "dog cat animal", "apple cat dog like", "dog fish milk like",
-              "dog cat eyes like", "i like apple", "apple i hate",
-              "apple i movie book music like", "cat dog hate", "cat dog like"]
-
-word_sequence = " ".join(sentences).split()
-word_list = " ".join(sentences).split()
-word_list = list(set(word_list))
-word_dict = {w: i for i, w in enumerate(word_list)}
-
-# Word2Vec Parameter
-batch_size = 20  # To show 2 dim embedding graph
-embedding_size = 2  # To show 2 dim embedding graph
-voc_size = len(word_list)
-
-def random_batch(data, size):
+def random_batch():
     random_inputs = []
     random_labels = []
-    random_index = np.random.choice(range(len(data)), size, replace=False)
+    random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)
 
     for i in random_index:
-        random_inputs.append(np.eye(voc_size)[data[i][0]])  # target
-        random_labels.append(data[i][1])  # context word
+        random_inputs.append(np.eye(voc_size)[skip_grams[i][0]])  # target
+        random_labels.append(skip_grams[i][1])  # context word
 
     return random_inputs, random_labels
 
-# Make skip gram of one size window
-skip_grams = []
-for i in range(1, len(word_sequence) - 1):
-    target = word_dict[word_sequence[i]]
-    context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]
-
-    for w in context:
-        skip_grams.append([target, w])
-
 # Model
 class Word2Vec(nn.Module):
     def __init__(self):
         super(Word2Vec, self).__init__()
-
         # W and WT is not Traspose relationship
-        self.W = nn.Parameter(-2 * torch.rand(voc_size, embedding_size) + 1).type(dtype) # voc_size > embedding_size Weight
-        self.WT = nn.Parameter(-2 * torch.rand(embedding_size, voc_size) + 1).type(dtype) # embedding_size > voc_size Weight
+        self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight
+        self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight
 
     def forward(self, X):
         # X : [batch_size, voc_size]
-        hidden_layer = torch.matmul(X, self.W) # hidden_layer : [batch_size, embedding_size]
-        output_layer = torch.matmul(hidden_layer, self.WT) # output_layer : [batch_size, voc_size]
+        hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]
+        output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]
         return output_layer
 
-model = Word2Vec()
-
-criterion = nn.CrossEntropyLoss()
-optimizer = optim.Adam(model.parameters(), lr=0.001)
-
-# Training
-for epoch in range(5000):
-
-    input_batch, target_batch = random_batch(skip_grams, batch_size)
-
-    input_batch = Variable(torch.Tensor(input_batch))
-    target_batch = Variable(torch.LongTensor(target_batch))
-
-    optimizer.zero_grad()
-    output = model(input_batch)
-
-    # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)
-    loss = criterion(output, target_batch)
-    if (epoch + 1)%1000 == 0:
-        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
-
-    loss.backward()
-    optimizer.step()
-
-for i, label in enumerate(word_list):
-    W, WT = model.parameters()
-    x,y = float(W[i][0]), float(W[i][1])
-    plt.scatter(x, y)
-    plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
-plt.show()
+if __name__ == '__main__':
+    batch_size = 2 # mini-batch size
+    embedding_size = 2 # embedding size
+
+    sentences = ["apple banana fruit", "banana orange fruit", "orange banana fruit",
+                 "dog cat animal", "cat monkey animal", "monkey dog animal"]
+
+    word_sequence = " ".join(sentences).split()
+    word_list = " ".join(sentences).split()
+    word_list = list(set(word_list))
+    word_dict = {w: i for i, w in enumerate(word_list)}
+    voc_size = len(word_list)
+
+    # Make skip gram of one size window
+    skip_grams = []
+    for i in range(1, len(word_sequence) - 1):
+        target = word_dict[word_sequence[i]]
+        context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]
+        for w in context:
+            skip_grams.append([target, w])
+
+    model = Word2Vec()
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+    # Training
+    for epoch in range(5000):
+        input_batch, target_batch = random_batch()
+        input_batch = torch.Tensor(input_batch)
+        target_batch = torch.LongTensor(target_batch)
+
+        optimizer.zero_grad()
+        output = model(input_batch)
+
+        # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)
+        loss = criterion(output, target_batch)
+        if (epoch + 1) % 1000 == 0:
+            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
+
+        loss.backward()
+        optimizer.step()
+
+    for i, label in enumerate(word_list):
+        W, WT = model.parameters()
+        x, y = W[0][i].item(), W[1][i].item()
+        plt.scatter(x, y)
+        plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
+    plt.show()