From 6e171b914ed28eb04dc9176916a99e9a996a7951 Mon Sep 17 00:00:00 2001 From: graykode Date: Mon, 8 Jul 2019 21:40:01 +0900 Subject: [PATCH] fix #30 vocab_size to seq_len --- .../Transformer(Greedy_decoder)-Torch.py | 4 ++-- .../Transformer(Greedy_decoder)_Torch.ipynb | 23 +++++++++++++++---- 5-1.Transformer/Transformer-Torch.py | 4 ++-- 5-1.Transformer/Transformer_Torch.ipynb | 18 +++++++++++---- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/5-1.Transformer/Transformer(Greedy_decoder)-Torch.py b/5-1.Transformer/Transformer(Greedy_decoder)-Torch.py index 36f1c6c..2c69037 100644 --- a/5-1.Transformer/Transformer(Greedy_decoder)-Torch.py +++ b/5-1.Transformer/Transformer(Greedy_decoder)-Torch.py @@ -138,7 +138,7 @@ class Encoder(nn.Module): def __init__(self): super(Encoder, self).__init__() self.src_emb = nn.Embedding(src_vocab_size, d_model) - self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_vocab_size, d_model),freeze=True) + self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True) self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]) def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len] @@ -154,7 +154,7 @@ class Decoder(nn.Module): def __init__(self): super(Decoder, self).__init__() self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model) - self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_vocab_size, d_model),freeze=True) + self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True) self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)]) def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len] diff --git a/5-1.Transformer/Transformer(Greedy_decoder)_Torch.ipynb b/5-1.Transformer/Transformer(Greedy_decoder)_Torch.ipynb index 6f1c818..87db144 100644 --- a/5-1.Transformer/Transformer(Greedy_decoder)_Torch.ipynb +++ b/5-1.Transformer/Transformer(Greedy_decoder)_Torch.ipynb @@ -93,6 +93,11 @@ } ], "source": [ + "'''\n", + " code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n", + " Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n", + " https://github.com/JayParks/transformer\n", + "'''\n", "'''\n", " code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n", " Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n", @@ -233,7 +238,7 @@ " def __init__(self):\n", " super(Encoder, self).__init__()\n", " self.src_emb = nn.Embedding(src_vocab_size, d_model)\n", - " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_vocab_size, d_model),freeze=True)\n", + " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n", @@ -249,7 +254,7 @@ " def __init__(self):\n", " super(Decoder, self).__init__()\n", " self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n", - " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_vocab_size, d_model),freeze=True)\n", + " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n", @@ -351,10 +356,20 @@ }, "kernelspec": { "name": "python3", + "language": "python", "display_name": "Python 3" }, - "accelerator": "GPU" + "accelerator": "GPU", + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } + } }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/5-1.Transformer/Transformer-Torch.py b/5-1.Transformer/Transformer-Torch.py index 59fc381..1c66708 100644 --- a/5-1.Transformer/Transformer-Torch.py +++ b/5-1.Transformer/Transformer-Torch.py @@ -137,7 +137,7 @@ class Encoder(nn.Module): def __init__(self): super(Encoder, self).__init__() self.src_emb = nn.Embedding(src_vocab_size, d_model) - self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_vocab_size, d_model),freeze=True) + self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True) self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]) def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len] @@ -153,7 +153,7 @@ class Decoder(nn.Module): def __init__(self): super(Decoder, self).__init__() self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model) - self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_vocab_size, d_model),freeze=True) + self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True) self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)]) def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len] diff --git a/5-1.Transformer/Transformer_Torch.ipynb b/5-1.Transformer/Transformer_Torch.ipynb index ab0e613..51e86e1 100644 --- a/5-1.Transformer/Transformer_Torch.ipynb +++ b/5-1.Transformer/Transformer_Torch.ipynb @@ -250,7 +250,7 @@ " def __init__(self):\n", " super(Encoder, self).__init__()\n", " self.src_emb = nn.Embedding(src_vocab_size, d_model)\n", - " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_vocab_size, d_model),freeze=True)\n", + " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n", @@ -266,7 +266,7 @@ " def __init__(self):\n", " super(Decoder, self).__init__()\n", " self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n", - " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_vocab_size, d_model),freeze=True)\n", + " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n", @@ -345,10 +345,20 @@ }, "kernelspec": { "name": "python3", + "language": "python", "display_name": "Python 3" }, - "accelerator": "GPU" + "accelerator": "GPU", + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } + } }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file