diff --git a/src/__pycache__/config.cpython-38.pyc b/src/__pycache__/config.cpython-38.pyc new file mode 100644 index 0000000..fb63045 Binary files /dev/null and b/src/__pycache__/config.cpython-38.pyc differ diff --git a/src/__pycache__/loader.cpython-38.pyc b/src/__pycache__/loader.cpython-38.pyc new file mode 100644 index 0000000..593fa7c Binary files /dev/null and b/src/__pycache__/loader.cpython-38.pyc differ diff --git a/src/config.py b/src/config.py index 191c77c..12f01d9 100644 --- a/src/config.py +++ b/src/config.py @@ -9,5 +9,5 @@ vocab_size = top_k + 1 features_shape = 2048 attention_features_shape = 64 -data_path = '/home/levanpon1009/work/project/xray-report/data' +data_path = '../../xraydata/data' EPOCHS = 20 diff --git a/src/loader.py b/src/loader.py index 3687613..a1df92a 100644 --- a/src/loader.py +++ b/src/loader.py @@ -5,13 +5,13 @@ from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input from sklearn.model_selection import train_test_split -from src import config +import config def load_csv(data_root): contents = pd.read_csv(os.path.join(data_root, 'data.csv')) all_text = contents['findings'].map(lambda x: ' ' + x + ' ').astype(str).to_numpy() - all_images = contents['filename'].map(lambda x: os.path.join(data_root, 'images', x)).astype(str).to_numpy() + all_images = contents['filename'].map(lambda x: os.path.join(data_root, 'images/images_normalized', x)).astype(str).to_numpy() train_images, valid_images, train_texts, valid_texts = train_test_split(all_images, all_text, test_size=0.2, random_state=42) @@ -47,7 +47,9 @@ def load_data(data_path): train_images, valid_images, train_texts, valid_texts, all_text = load_csv(data_path) tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=config.top_k, - oov_token="") + oov_token="", + filters='!"#$\t\n', + lower= True) tokenizer.fit_on_texts(all_text) tokenizer.word_index[''] = 0 diff --git a/src/logs/gradient_tape/20200930-H0749/test/events.out.tfevents.1601471269.minh-B365-M-AORUS-ELITE.8517.9893.v2 b/src/logs/gradient_tape/20200930-H0749/test/events.out.tfevents.1601471269.minh-B365-M-AORUS-ELITE.8517.9893.v2 new file mode 100644 index 0000000..5585300 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H0749/test/events.out.tfevents.1601471269.minh-B365-M-AORUS-ELITE.8517.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H0749/train/events.out.tfevents.1601471269.minh-B365-M-AORUS-ELITE.8517.9885.v2 b/src/logs/gradient_tape/20200930-H0749/train/events.out.tfevents.1601471269.minh-B365-M-AORUS-ELITE.8517.9885.v2 new file mode 100644 index 0000000..5585300 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H0749/train/events.out.tfevents.1601471269.minh-B365-M-AORUS-ELITE.8517.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H0817/test/events.out.tfevents.1601471297.minh-B365-M-AORUS-ELITE.8610.9893.v2 b/src/logs/gradient_tape/20200930-H0817/test/events.out.tfevents.1601471297.minh-B365-M-AORUS-ELITE.8610.9893.v2 new file mode 100644 index 0000000..158763a Binary files /dev/null and b/src/logs/gradient_tape/20200930-H0817/test/events.out.tfevents.1601471297.minh-B365-M-AORUS-ELITE.8610.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H0817/train/events.out.tfevents.1601471297.minh-B365-M-AORUS-ELITE.8610.9885.v2 b/src/logs/gradient_tape/20200930-H0817/train/events.out.tfevents.1601471297.minh-B365-M-AORUS-ELITE.8610.9885.v2 new file mode 100644 index 0000000..158763a Binary files /dev/null and b/src/logs/gradient_tape/20200930-H0817/train/events.out.tfevents.1601471297.minh-B365-M-AORUS-ELITE.8610.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H0955/test/events.out.tfevents.1601471395.minh-B365-M-AORUS-ELITE.8771.9893.v2 b/src/logs/gradient_tape/20200930-H0955/test/events.out.tfevents.1601471395.minh-B365-M-AORUS-ELITE.8771.9893.v2 new file mode 100644 index 0000000..999cad9 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H0955/test/events.out.tfevents.1601471395.minh-B365-M-AORUS-ELITE.8771.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H0955/train/events.out.tfevents.1601471395.minh-B365-M-AORUS-ELITE.8771.9885.v2 b/src/logs/gradient_tape/20200930-H0955/train/events.out.tfevents.1601471395.minh-B365-M-AORUS-ELITE.8771.9885.v2 new file mode 100644 index 0000000..999cad9 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H0955/train/events.out.tfevents.1601471395.minh-B365-M-AORUS-ELITE.8771.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H1159/test/events.out.tfevents.1601478719.minh-B365-M-AORUS-ELITE.5694.9893.v2 b/src/logs/gradient_tape/20200930-H1159/test/events.out.tfevents.1601478719.minh-B365-M-AORUS-ELITE.5694.9893.v2 new file mode 100644 index 0000000..95fa6a0 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H1159/test/events.out.tfevents.1601478719.minh-B365-M-AORUS-ELITE.5694.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H1159/train/events.out.tfevents.1601478719.minh-B365-M-AORUS-ELITE.5694.9885.v2 b/src/logs/gradient_tape/20200930-H1159/train/events.out.tfevents.1601478719.minh-B365-M-AORUS-ELITE.5694.9885.v2 new file mode 100644 index 0000000..95fa6a0 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H1159/train/events.out.tfevents.1601478719.minh-B365-M-AORUS-ELITE.5694.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H2928/test/events.out.tfevents.1601468968.minh-B365-M-AORUS-ELITE.6210.9893.v2 b/src/logs/gradient_tape/20200930-H2928/test/events.out.tfevents.1601468968.minh-B365-M-AORUS-ELITE.6210.9893.v2 new file mode 100644 index 0000000..6e5fd15 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H2928/test/events.out.tfevents.1601468968.minh-B365-M-AORUS-ELITE.6210.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H2928/train/events.out.tfevents.1601468968.minh-B365-M-AORUS-ELITE.6210.9885.v2 b/src/logs/gradient_tape/20200930-H2928/train/events.out.tfevents.1601468968.minh-B365-M-AORUS-ELITE.6210.9885.v2 new file mode 100644 index 0000000..6e5fd15 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H2928/train/events.out.tfevents.1601468968.minh-B365-M-AORUS-ELITE.6210.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H3044/test/events.out.tfevents.1601469044.minh-B365-M-AORUS-ELITE.6429.9893.v2 b/src/logs/gradient_tape/20200930-H3044/test/events.out.tfevents.1601469044.minh-B365-M-AORUS-ELITE.6429.9893.v2 new file mode 100644 index 0000000..a206534 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H3044/test/events.out.tfevents.1601469044.minh-B365-M-AORUS-ELITE.6429.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H3044/train/events.out.tfevents.1601469044.minh-B365-M-AORUS-ELITE.6429.9885.v2 b/src/logs/gradient_tape/20200930-H3044/train/events.out.tfevents.1601469044.minh-B365-M-AORUS-ELITE.6429.9885.v2 new file mode 100644 index 0000000..a206534 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H3044/train/events.out.tfevents.1601469044.minh-B365-M-AORUS-ELITE.6429.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H3100/test/events.out.tfevents.1601469060.minh-B365-M-AORUS-ELITE.6495.9893.v2 b/src/logs/gradient_tape/20200930-H3100/test/events.out.tfevents.1601469060.minh-B365-M-AORUS-ELITE.6495.9893.v2 new file mode 100644 index 0000000..7b89fc9 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H3100/test/events.out.tfevents.1601469060.minh-B365-M-AORUS-ELITE.6495.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H3100/train/events.out.tfevents.1601469060.minh-B365-M-AORUS-ELITE.6495.9885.v2 b/src/logs/gradient_tape/20200930-H3100/train/events.out.tfevents.1601469060.minh-B365-M-AORUS-ELITE.6495.9885.v2 new file mode 100644 index 0000000..7b89fc9 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H3100/train/events.out.tfevents.1601469060.minh-B365-M-AORUS-ELITE.6495.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H3123/test/events.out.tfevents.1601469083.minh-B365-M-AORUS-ELITE.6566.9893.v2 b/src/logs/gradient_tape/20200930-H3123/test/events.out.tfevents.1601469083.minh-B365-M-AORUS-ELITE.6566.9893.v2 new file mode 100644 index 0000000..639eb57 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H3123/test/events.out.tfevents.1601469083.minh-B365-M-AORUS-ELITE.6566.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H3123/train/events.out.tfevents.1601469083.minh-B365-M-AORUS-ELITE.6566.9885.v2 b/src/logs/gradient_tape/20200930-H3123/train/events.out.tfevents.1601469083.minh-B365-M-AORUS-ELITE.6566.9885.v2 new file mode 100644 index 0000000..639eb57 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H3123/train/events.out.tfevents.1601469083.minh-B365-M-AORUS-ELITE.6566.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4018/test/events.out.tfevents.1601469618.minh-B365-M-AORUS-ELITE.7018.9893.v2 b/src/logs/gradient_tape/20200930-H4018/test/events.out.tfevents.1601469618.minh-B365-M-AORUS-ELITE.7018.9893.v2 new file mode 100644 index 0000000..45b3a48 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4018/test/events.out.tfevents.1601469618.minh-B365-M-AORUS-ELITE.7018.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4018/train/events.out.tfevents.1601469618.minh-B365-M-AORUS-ELITE.7018.9885.v2 b/src/logs/gradient_tape/20200930-H4018/train/events.out.tfevents.1601469618.minh-B365-M-AORUS-ELITE.7018.9885.v2 new file mode 100644 index 0000000..45b3a48 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4018/train/events.out.tfevents.1601469618.minh-B365-M-AORUS-ELITE.7018.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4033/test/events.out.tfevents.1601469633.minh-B365-M-AORUS-ELITE.7084.9893.v2 b/src/logs/gradient_tape/20200930-H4033/test/events.out.tfevents.1601469633.minh-B365-M-AORUS-ELITE.7084.9893.v2 new file mode 100644 index 0000000..7b9862f Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4033/test/events.out.tfevents.1601469633.minh-B365-M-AORUS-ELITE.7084.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4033/train/events.out.tfevents.1601469633.minh-B365-M-AORUS-ELITE.7084.9885.v2 b/src/logs/gradient_tape/20200930-H4033/train/events.out.tfevents.1601469633.minh-B365-M-AORUS-ELITE.7084.9885.v2 new file mode 100644 index 0000000..7b9862f Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4033/train/events.out.tfevents.1601469633.minh-B365-M-AORUS-ELITE.7084.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4056/test/events.out.tfevents.1601469656.minh-B365-M-AORUS-ELITE.7162.9893.v2 b/src/logs/gradient_tape/20200930-H4056/test/events.out.tfevents.1601469656.minh-B365-M-AORUS-ELITE.7162.9893.v2 new file mode 100644 index 0000000..06136f5 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4056/test/events.out.tfevents.1601469656.minh-B365-M-AORUS-ELITE.7162.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4056/train/events.out.tfevents.1601469656.minh-B365-M-AORUS-ELITE.7162.9885.v2 b/src/logs/gradient_tape/20200930-H4056/train/events.out.tfevents.1601469656.minh-B365-M-AORUS-ELITE.7162.9885.v2 new file mode 100644 index 0000000..06136f5 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4056/train/events.out.tfevents.1601469656.minh-B365-M-AORUS-ELITE.7162.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4240/test/events.out.tfevents.1601469760.minh-B365-M-AORUS-ELITE.7294.9893.v2 b/src/logs/gradient_tape/20200930-H4240/test/events.out.tfevents.1601469760.minh-B365-M-AORUS-ELITE.7294.9893.v2 new file mode 100644 index 0000000..59acb98 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4240/test/events.out.tfevents.1601469760.minh-B365-M-AORUS-ELITE.7294.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4240/train/events.out.tfevents.1601469760.minh-B365-M-AORUS-ELITE.7294.9885.v2 b/src/logs/gradient_tape/20200930-H4240/train/events.out.tfevents.1601469760.minh-B365-M-AORUS-ELITE.7294.9885.v2 new file mode 100644 index 0000000..59acb98 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4240/train/events.out.tfevents.1601469760.minh-B365-M-AORUS-ELITE.7294.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4422/test/events.out.tfevents.1601477062.minh-B365-M-AORUS-ELITE.11214.9893.v2 b/src/logs/gradient_tape/20200930-H4422/test/events.out.tfevents.1601477062.minh-B365-M-AORUS-ELITE.11214.9893.v2 new file mode 100644 index 0000000..c585e92 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4422/test/events.out.tfevents.1601477062.minh-B365-M-AORUS-ELITE.11214.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4422/train/events.out.tfevents.1601477062.minh-B365-M-AORUS-ELITE.11214.9885.v2 b/src/logs/gradient_tape/20200930-H4422/train/events.out.tfevents.1601477062.minh-B365-M-AORUS-ELITE.11214.9885.v2 new file mode 100644 index 0000000..c585e92 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4422/train/events.out.tfevents.1601477062.minh-B365-M-AORUS-ELITE.11214.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4457/test/events.out.tfevents.1601477097.minh-B365-M-AORUS-ELITE.11292.9893.v2 b/src/logs/gradient_tape/20200930-H4457/test/events.out.tfevents.1601477097.minh-B365-M-AORUS-ELITE.11292.9893.v2 new file mode 100644 index 0000000..500946d Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4457/test/events.out.tfevents.1601477097.minh-B365-M-AORUS-ELITE.11292.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4457/train/events.out.tfevents.1601477097.minh-B365-M-AORUS-ELITE.11292.9885.v2 b/src/logs/gradient_tape/20200930-H4457/train/events.out.tfevents.1601477097.minh-B365-M-AORUS-ELITE.11292.9885.v2 new file mode 100644 index 0000000..500946d Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4457/train/events.out.tfevents.1601477097.minh-B365-M-AORUS-ELITE.11292.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4605/test/events.out.tfevents.1601477165.minh-B365-M-AORUS-ELITE.11366.9893.v2 b/src/logs/gradient_tape/20200930-H4605/test/events.out.tfevents.1601477165.minh-B365-M-AORUS-ELITE.11366.9893.v2 new file mode 100644 index 0000000..cb9d2a9 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4605/test/events.out.tfevents.1601477165.minh-B365-M-AORUS-ELITE.11366.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4605/train/events.out.tfevents.1601477165.minh-B365-M-AORUS-ELITE.11366.9885.v2 b/src/logs/gradient_tape/20200930-H4605/train/events.out.tfevents.1601477165.minh-B365-M-AORUS-ELITE.11366.9885.v2 new file mode 100644 index 0000000..cb9d2a9 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4605/train/events.out.tfevents.1601477165.minh-B365-M-AORUS-ELITE.11366.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4726/test/events.out.tfevents.1601470046.minh-B365-M-AORUS-ELITE.7607.9893.v2 b/src/logs/gradient_tape/20200930-H4726/test/events.out.tfevents.1601470046.minh-B365-M-AORUS-ELITE.7607.9893.v2 new file mode 100644 index 0000000..89d330a Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4726/test/events.out.tfevents.1601470046.minh-B365-M-AORUS-ELITE.7607.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H4726/train/events.out.tfevents.1601470046.minh-B365-M-AORUS-ELITE.7607.9885.v2 b/src/logs/gradient_tape/20200930-H4726/train/events.out.tfevents.1601470046.minh-B365-M-AORUS-ELITE.7607.9885.v2 new file mode 100644 index 0000000..89d330a Binary files /dev/null and b/src/logs/gradient_tape/20200930-H4726/train/events.out.tfevents.1601470046.minh-B365-M-AORUS-ELITE.7607.9885.v2 differ diff --git a/src/logs/gradient_tape/20200930-H5041/test/events.out.tfevents.1601477441.minh-B365-M-AORUS-ELITE.2383.9893.v2 b/src/logs/gradient_tape/20200930-H5041/test/events.out.tfevents.1601477441.minh-B365-M-AORUS-ELITE.2383.9893.v2 new file mode 100644 index 0000000..e24ced1 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H5041/test/events.out.tfevents.1601477441.minh-B365-M-AORUS-ELITE.2383.9893.v2 differ diff --git a/src/logs/gradient_tape/20200930-H5041/train/events.out.tfevents.1601477441.minh-B365-M-AORUS-ELITE.2383.9885.v2 b/src/logs/gradient_tape/20200930-H5041/train/events.out.tfevents.1601477441.minh-B365-M-AORUS-ELITE.2383.9885.v2 new file mode 100644 index 0000000..e24ced1 Binary files /dev/null and b/src/logs/gradient_tape/20200930-H5041/train/events.out.tfevents.1601477441.minh-B365-M-AORUS-ELITE.2383.9885.v2 differ diff --git a/src/models/__pycache__/__init__.cpython-38.pyc b/src/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..54a9ab6 Binary files /dev/null and b/src/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/src/models/__pycache__/decoder.cpython-38.pyc b/src/models/__pycache__/decoder.cpython-38.pyc new file mode 100644 index 0000000..e5d80e8 Binary files /dev/null and b/src/models/__pycache__/decoder.cpython-38.pyc differ diff --git a/src/models/__pycache__/encoder.cpython-38.pyc b/src/models/__pycache__/encoder.cpython-38.pyc new file mode 100644 index 0000000..5b2c127 Binary files /dev/null and b/src/models/__pycache__/encoder.cpython-38.pyc differ diff --git a/src/models/__pycache__/multiheadattn.cpython-38.pyc b/src/models/__pycache__/multiheadattn.cpython-38.pyc new file mode 100644 index 0000000..6c70476 Binary files /dev/null and b/src/models/__pycache__/multiheadattn.cpython-38.pyc differ diff --git a/src/models/__pycache__/transformer.cpython-38.pyc b/src/models/__pycache__/transformer.cpython-38.pyc new file mode 100644 index 0000000..461a92a Binary files /dev/null and b/src/models/__pycache__/transformer.cpython-38.pyc differ diff --git a/src/models/decoder.py b/src/models/decoder.py index 07df82e..08762e3 100644 --- a/src/models/decoder.py +++ b/src/models/decoder.py @@ -1,5 +1,5 @@ import tensorflow as tf - +from models.transformer import MultiHeadAttention class BahdanauAttention(tf.keras.Model): def __init__(self, units): @@ -46,11 +46,11 @@ def __init__(self, embedding_dim, units, vocab_size): self.fc1 = tf.keras.layers.Dense(self.units) self.fc2 = tf.keras.layers.Dense(vocab_size) - self.attention = BahdanauAttention(self.units) + self.attention = MultiHeadAttention(self.units, num_heads= 8) def call(self, x, features, hidden): # defining attention as a separate model - context_vector, attention_weights = self.attention(features, hidden) + context_vector, attention_weights = self.MultiHeadAttention(features, hidden) # x shape after passing through embedding == (batch_size, 1, embedding_dim) x = self.embedding(x) @@ -74,3 +74,48 @@ def call(self, x, features, hidden): def reset_state(self, batch_size): return tf.zeros((batch_size, self.units)) + +class MultiheadDecoder(tf.keras.Model): + def __init__(self, embedding_dim, units, vocab_size): + super(MultiheadDecoder, self).__init__() + self.units = units + + self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) + self.gru = tf.keras.layers.GRU(self.units, + return_sequences=True, + return_state=True, + recurrent_initializer='glorot_uniform') + self.fc1 = tf.keras.layers.Dense(self.units) + self.fc2 = tf.keras.layers.Dense(vocab_size) + + self.multiheadattention = MultiHeadAttention(self.units, num_heads= 8) + self.attention = BahdanauAttention(self.units) + + def call(self, x, features, hidden): + # defining attention as a separate model + + features, _ = self.multiheadattention(features, features, features) + context_vector, attention_weights = self.attention(features, hidden) + + # x shape after passing through embedding == (batch_size, 1, embedding_dim) + x = self.embedding(x) + + # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size) + x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) + + # passing the concatenated vector to the GRU + output, state = self.gru(x) + + # shape == (batch_size, max_length, hidden_size) + x = self.fc1(output) + + # x shape == (batch_size * max_length, hidden_size) + x = tf.reshape(x, (-1, x.shape[2])) + + # output shape == (batch_size * max_length, vocab) + x = self.fc2(x) + + return x, state, attention_weights + + def reset_state(self, batch_size): + return tf.zeros((batch_size, self.units)) \ No newline at end of file diff --git a/src/models/encoder.py b/src/models/encoder.py index 3743d0f..7c8cfe3 100644 --- a/src/models/encoder.py +++ b/src/models/encoder.py @@ -1,7 +1,7 @@ import tensorflow as tf from tensorflow.keras.applications import EfficientNetB3 -from src import config +import config class Encoder(tf.keras.Model): diff --git a/src/models/multihead_attn.py b/src/models/multihead_attn.py deleted file mode 100644 index 11b3690..0000000 --- a/src/models/multihead_attn.py +++ /dev/null @@ -1,49 +0,0 @@ -from keras - -class MultiHeadAttention(tf.keras.layers.Layer): - def __init__(self, d_model, num_heads): - super(MultiHeadAttention, self).__init__() - self.num_heads = num_heads - self.d_model = d_model - - assert d_model % self.num_heads == 0 - - self.depth = d_model // self.num_heads - - self.wq = tf.keras.layers.Dense(d_model) - self.wk = tf.keras.layers.Dense(d_model) - self.wv = tf.keras.layers.Dense(d_model) - - self.dense = tf.keras.layers.Dense(d_model) - - def split_heads(self, x, batch_size): - """Split the last dimension into (num_heads, depth). - Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth) - """ - x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth)) - return tf.transpose(x, perm=[0, 2, 1, 3]) - - def call(self, v, k, q, mask): - batch_size = tf.shape(q)[0] - - q = self.wq(q) # (batch_size, seq_len, d_model) - k = self.wk(k) # (batch_size, seq_len, d_model) - v = self.wv(v) # (batch_size, seq_len, d_model) - - q = self.split_heads(q, batch_size) # (batch_size, num_heads, seq_len_q, depth) - k = self.split_heads(k, batch_size) # (batch_size, num_heads, seq_len_k, depth) - v = self.split_heads(v, batch_size) # (batch_size, num_heads, seq_len_v, depth) - - # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth) - # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k) - scaled_attention, attention_weights = scaled_dot_product_attention( - q, k, v, mask) - - scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # (batch_size, seq_len_q, num_heads, depth) - - concat_attention = tf.reshape(scaled_attention, - (batch_size, -1, self.d_model)) # (batch_size, seq_len_q, d_model) - - output = self.dense(concat_attention) # (batch_size, seq_len_q, d_model) - - return output, attention_weights \ No newline at end of file diff --git a/src/models/transformer.py b/src/models/transformer.py new file mode 100644 index 0000000..95b0e69 --- /dev/null +++ b/src/models/transformer.py @@ -0,0 +1,141 @@ +import tensorflow as tf + +def scaled_dot_product_attention(q, k, v, mask): + """Calculate the attention weights. + q, k, v must have matching leading dimensions. + k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v. + The mask has different shapes depending on its type(padding or look ahead) + but it must be broadcastable for addition. + + Args: + q: query shape == (..., seq_len_q, depth) + k: key shape == (..., seq_len_k, depth) + v: value shape == (..., seq_len_v, depth_v) + mask: Float tensor with shape broadcastable + to (..., seq_len_q, seq_len_k). Defaults to None. + + Returns: + output, attention_weights + """ + + matmul_qk = tf.matmul(q, k, transpose_b=True) # (..., seq_len_q, seq_len_k) + + # scale matmul_qk + dk = tf.cast(tf.shape(k)[-1], tf.float32) + scaled_attention_logits = matmul_qk / tf.math.sqrt(dk) + + # add the mask to the scaled tensor. + if mask is not None: + scaled_attention_logits += (mask * -1e9) + + # softmax is normalized on the last axis (seq_len_k) so that the scores + # add up to 1. + attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1) # (..., seq_len_q, seq_len_k) + + output = tf.matmul(attention_weights, v) # (..., seq_len_q, depth_v) + + return output, attention_weights + + +class MultiHeadAttention(tf.keras.layers.Layer): + def __init__(self, d_model, num_heads): + super(MultiHeadAttention, self).__init__() + self.num_heads = num_heads + self.d_model = d_model + + assert d_model % self.num_heads == 0 + + self.depth = d_model // self.num_heads + + self.wq = tf.keras.layers.Dense(d_model) + self.wk = tf.keras.layers.Dense(d_model) + self.wv = tf.keras.layers.Dense(d_model) + + self.dense = tf.keras.layers.Dense(d_model) + + def split_heads(self, x, batch_size): + """Split the last dimension into (num_heads, depth). + Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth) + """ + x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth)) + return tf.transpose(x, perm=[0, 2, 1, 3]) + + def call(self, v, k, q, mask = None): + batch_size = tf.shape(q)[0] + + q = self.wq(q) # (batch_size, seq_len, d_model) + k = self.wk(k) # (batch_size, seq_len, d_model) + v = self.wv(v) # (batch_size, seq_len, d_model) + + q = self.split_heads(q, batch_size) # (batch_size, num_heads, seq_len_q, depth) + k = self.split_heads(k, batch_size) # (batch_size, num_heads, seq_len_k, depth) + v = self.split_heads(v, batch_size) # (batch_size, num_heads, seq_len_v, depth) + + # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth) + # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k) + scaled_attention, attention_weights = scaled_dot_product_attention( + q, k, v, mask) + + scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # (batch_size, seq_len_q, num_heads, depth) + + concat_attention = tf.reshape(scaled_attention, + (batch_size, -1, self.d_model)) # (batch_size, seq_len_q, d_model) + + output = self.dense(concat_attention) # (batch_size, seq_len_q, d_model) + + return output, attention_weights + + def reset_state(self, batch_size): + return tf.zeros((batch_size, self.units)) + +def create_padding_mask(seq): + seq = tf.cast(tf.math.equal(seq, 0), tf.float32) + + # add extra dimensions to add the padding + # to the attention logits. + return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len) + +class FullyConnected(tf.keras.layers.Layer): + def __init__(self, dmodel, dff = 2048): + self.ffn = tf.keras.Sequential([ + tf.keras.layers.Dense(dff, activation='relu'), # (batch_size, seq_len, dff) + tf.keras.layers.Dense(d_model) # (batch_size, seq_len, d_model) + ]) + def call(self, x ): + return self.ffn(x) + + +class TransformerLayer(tf.keras.layers.Layer): + def __init__(self, d_model, num_heads, dff, rate=0.1, with_external = False): + super(TransformerLayer, self).__init__() + + self.mha = MultiHeadAttention(d_model, num_heads) + self.ffn = FullyConnected(d_model, dff) + + self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + + self.dropout1 = tf.keras.layers.Dropout(rate) + self.dropout2 = tf.keras.layers.Dropout(rate) + + self.with_external = with_external + # if self.with_external: + + def call(self, x, training, mask): + attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model) + attn_output = self.dropout1(attn_output, training=training) + out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model) + + ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model) + ffn_output = self.dropout2(ffn_output, training=training) + out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model) + + return out2 + +class Transformer(tf.keras.layers.Layer): + def __init__(self, num_layers, d_model, num_heads, dff, rate=0.1): + self.enc_layers = [TransformerLayer(d_model, num_heads, dff, rate) + for _ in range(num_layers)] + self.enc_layers = [TransformerLayer(d_model, num_heads, dff, rate, with_external=Transformer) + for _ in range(num_layers)] + diff --git a/src/train.py b/src/train.py index 6a8f6df..b9b74f1 100644 --- a/src/train.py +++ b/src/train.py @@ -4,18 +4,18 @@ import tensorflow as tf from tensorflow.keras.utils import Progbar -from src import config -from src.loader import load_data -from src.models.encoder import Encoder -from src.models.decoder import Decoder - +import config +from loader import load_data +from models.encoder import Encoder +from models.decoder import Decoder, MultiheadDecoder if __name__ == '__main__': train_ds, valid_ds, max_length_train, max_length_valid, tokenizer = load_data(config.data_path) encoder = Encoder(config.embedding_dim) - decoder = Decoder(config.embedding_dim, config.units, config.vocab_size) + # decoder = Decoder(config.embedding_dim, config.units, config.vocab_size) + decoder = MultiheadDecoder(config.embedding_dim, config.units, config.vocab_size) optimizer = tf.keras.optimizers.Adam() loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') @@ -47,14 +47,16 @@ def train_step(img_tensor, target): # because the captions are not related from image to image hidden = decoder.reset_state(batch_size=target.shape[0]) dec_input = tf.expand_dims([tokenizer.word_index['']] * target.shape[0], 1) - + # target length x units with tf.GradientTape() as tape: + # 81 x 256 features = encoder(img_tensor) + print('encode shape :', features.shape) for i in range(1, target.shape[1]): # passing the features through the decoder predictions, hidden, _ = decoder(dec_input, features, hidden) - + loss += loss_function(target[:, i], predictions) # using teacher forcing @@ -70,20 +72,52 @@ def train_step(img_tensor, target): return loss, total_loss + def evaluate_step(img_tensor, target): + loss = 0 + + # initializing the hidden state for each batch + # because the captions are not related from image to image + hidden = decoder.reset_state(batch_size=target.shape[0]) + dec_input = tf.expand_dims([tokenizer.word_index['']] * target.shape[0], 1) + + features = encoder(img_tensor) - EPOCHS = 20 + for i in range(1, target.shape[1]): + # passing the features through the decoder + predictions, hidden, _ = decoder(dec_input, features, hidden) + + loss += loss_function(target[:, i], predictions) + + # using teacher forcing + dec_input = tf.expand_dims(target[:, i], 1) + + total_loss = (loss / int(target.shape[1])) + + return loss, total_loss + + EPOCHS = config.EPOCHS for epoch in range(0, EPOCHS): start = time.time() total_loss = 0 pb_i = Progbar(max_length_train, stateful_metrics=['loss']) + # Training + print('[TRAIN]') for (batch, (img_tensor, target)) in enumerate(train_ds): batch_loss, t_loss = train_step(img_tensor, target) total_loss += t_loss - pb_i.add(config.BATCH_SIZE, values=[('loss', total_loss)]) - + pb_i.add(config.BATCH_SIZE, values=[('total loss', total_loss)]) + pb_i.add(config.BATCH_SIZE, values=[('batch loss', batch_loss)]) + + # Evaluate + print('[EVALUATE]') + for (batch, (img_tensor, target)) in enumerate(valid_ds): + batch_loss, t_loss = evaluate_step(img_tensor, target) + total_loss += t_loss + pb_i.add(config.BATCH_SIZE, values=[('total loss', total_loss)]) + pb_i.add(config.BATCH_SIZE, values=[('batch loss', batch_loss)]) ckpt_manager.save()