forked from bfelbo/DeepMoji
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel_def.py
251 lines (204 loc) · 10.3 KB
/
model_def.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
""" Model definition functions and weight loading.
"""
from __future__ import print_function, division
from keras.models import Model, Sequential
from keras.layers.merge import concatenate
from keras.layers import Input, Bidirectional, Embedding, Dense, Dropout, SpatialDropout1D, LSTM, Activation
from keras.regularizers import L1L2
from attlayer import AttentionWeightedAverage
from global_variables import NB_TOKENS, NB_EMOJI_CLASSES
import numpy as np
from copy import deepcopy
from os.path import exists
import h5py
def deepmoji_feature_encoding(maxlen, weight_path):
""" Loads the pretrained DeepMoji model for extracting features
from the penultimate feature layer. In this way, it transforms
the text into its emotional encoding.
# Arguments:
maxlen: Maximum length of a sentence (given in tokens).
weight_path: Path to model weights to be loaded.
# Returns:
Pretrained model for encoding text into feature vectors.
"""
model = deepmoji_architecture(nb_classes=None, nb_tokens=NB_TOKENS,
maxlen=maxlen, feature_output=True)
load_specific_weights(model, weight_path, exclude_names=['softmax'])
return model
def deepmoji_emojis(maxlen, weight_path):
""" Loads the pretrained DeepMoji model for extracting features
from the penultimate feature layer. In this way, it transforms
the text into its emotional encoding.
# Arguments:
maxlen: Maximum length of a sentence (given in tokens).
weight_path: Path to model weights to be loaded.
# Returns:
Pretrained model for encoding text into feature vectors.
"""
model = deepmoji_architecture(nb_classes=NB_EMOJI_CLASSES,
nb_tokens=NB_TOKENS, maxlen=maxlen)
model.load_weights(weight_path, by_name=False)
return model
def deepmoji_transfer(nb_classes, maxlen, weight_path=None, extend_embedding=0,
embed_dropout_rate=0.25, final_dropout_rate=0.5,
embed_l2=1E-6):
""" Loads the pretrained DeepMoji model for finetuning/transfer learning.
Does not load weights for the softmax layer.
Note that if you are planning to use class average F1 for evaluation,
nb_classes should be set to 2 instead of the actual number of classes
in the dataset, since binary classification will be performed on each
class individually.
Note that for the 'new' method, weight_path should be left as None.
# Arguments:
nb_classes: Number of classes in the dataset.
maxlen: Maximum length of a sentence (given in tokens).
weight_path: Path to model weights to be loaded.
extend_embedding: Number of tokens that have been added to the
vocabulary on top of NB_TOKENS. If this number is larger than 0,
the embedding layer's dimensions are adjusted accordingly, with the
additional weights being set to random values.
embed_dropout_rate: Dropout rate for the embedding layer.
final_dropout_rate: Dropout rate for the final Softmax layer.
embed_l2: L2 regularization for the embedding layerl.
# Returns:
Model with the given parameters.
"""
model = deepmoji_architecture(nb_classes=nb_classes,
nb_tokens=NB_TOKENS + extend_embedding,
maxlen=maxlen, embed_dropout_rate=embed_dropout_rate,
final_dropout_rate=final_dropout_rate, embed_l2=embed_l2)
if weight_path is not None:
load_specific_weights(model, weight_path,
exclude_names=['softmax'],
extend_embedding=extend_embedding)
return model
def deepmoji_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_l2=1E-6):
"""
Returns the DeepMoji architecture uninitialized and
without using the pretrained model weights.
# Arguments:
nb_classes: Number of classes in the dataset.
nb_tokens: Number of tokens in the dataset (i.e. vocabulary size).
maxlen: Maximum length of a token.
feature_output: If True the model returns the penultimate
feature vector rather than Softmax probabilities
(defaults to False).
embed_dropout_rate: Dropout rate for the embedding layer.
final_dropout_rate: Dropout rate for the final Softmax layer.
embed_l2: L2 regularization for the embedding layerl.
# Returns:
Model with the given parameters.
"""
# define embedding layer that turns word tokens into vectors
# an activation function is used to bound the values of the embedding
model_input = Input(shape=(maxlen,), dtype='int32')
embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None
embed = Embedding(input_dim=nb_tokens,
output_dim=256,
mask_zero=True,
input_length=maxlen,
embeddings_regularizer=embed_reg,
name='embedding')
x = embed(model_input)
x = Activation('tanh')(x)
# entire embedding channels are dropped out instead of the
# normal Keras embedding dropout, which drops all channels for entire words
# many of the datasets contain so few words that losing one or more words can alter the emotions completely
if embed_dropout_rate != 0:
embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop')
x = embed_drop(x)
# skip-connection from embedding to output eases gradient-flow and allows access to lower-level features
# ordering of the way the merge is done is important for consistency with the pretrained model
lstm_0_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x)
lstm_1_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_1")(lstm_0_output)
x = concatenate([lstm_1_output, lstm_0_output, x])
x = AttentionWeightedAverage(name='attlayer')(x)
if feature_output == False:
# output class probabilities
if final_dropout_rate != 0:
x = Dropout(final_dropout_rate)(x)
if nb_classes > 2:
outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)]
else:
outputs = [Dense(1, activation='sigmoid', name='softmax')(x)]
model = Model(inputs=[model_input], outputs=outputs, name="DeepMoji")
else:
# output penultimate feature vector
model = Model(inputs=[model_input], outputs=x, name="DeepMoji")
return model
def load_specific_weights(model, weight_path, exclude_names=[], extend_embedding=0, verbose=True):
""" Loads model weights from the given file path, excluding any
given layers.
# Arguments:
model: Model whose weights should be loaded.
weight_path: Path to file containing model weights.
exclude_names: List of layer names whose weights should not be loaded.
extend_embedding: Number of new words being added to vocabulary.
verbose: Verbosity flag.
# Raises:
ValueError if the file at weight_path does not exist.
"""
if not exists(weight_path):
raise ValueError('ERROR (load_weights): The weights file at {} does '
'not exist. Refer to the README for instructions.'
.format(weight_path))
if extend_embedding and 'embedding' in exclude_names:
raise ValueError('ERROR (load_weights): Cannot extend a vocabulary '
'without loading the embedding weights.')
# Copy only weights from the temporary model that are wanted
# for the specific task (e.g. the Softmax is often ignored)
layer_weights = get_weights_from_hdf5(weight_path)
for i, w in enumerate(layer_weights):
l_name = w[0]
weight_names = w[1]
weight_values = w[2]
if l_name in exclude_names:
if verbose:
print('Ignoring weights for {}'.format(l_name))
continue
try:
model_l = model.get_layer(name=l_name)
except ValueError:
raise ValueError("Weights had layer {},".format(l_name)
+ " but could not find this layer in model.")
if verbose:
print('Loading weights for {}'.format(l_name))
# extend embedding layer to allow new randomly initialized words
# if requested. Otherwise, just load the weights for the layer.
if type(model_l) is Embedding and extend_embedding > 0:
comb_weights = append_to_embedding(weight_values,
model_l.get_weights())
model_l.set_weights(comb_weights)
if verbose:
print('Extended vocabulary for embedding layer ' +
'from {} to {} tokens.'.format(
NB_TOKENS, NB_TOKENS + extend_embedding))
else:
model_l.set_weights(weight_values)
def append_to_embedding(pretrain_weights, random_init_weights):
""" Uses pretrained weights for the tokens already in the vocabulary.
Remaining weights will be left with the random initialization. """
pretrain_weights = deepcopy(pretrain_weights)
if type(pretrain_weights) == list:
pretrain_weights = pretrain_weights[0]
if type(random_init_weights) == list:
random_init_weights = random_init_weights[0]
nb_old_tokens = np.shape(pretrain_weights)[0]
random_init_weights[:nb_old_tokens] = pretrain_weights
# must be returned as a list to be properly inserted into Keras model
return [random_init_weights]
def get_weights_from_hdf5(filepath):
""" Loads the weights from a saved Keras model into numpy arrays.
The weights are saved using Keras 2.0 so we don't need all the
conversion functionality for handling old weights.
"""
with h5py.File(filepath, mode='r') as f:
layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
layer_weights = []
for k, l_name in enumerate(layer_names):
g = f[l_name]
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
weight_values = [g[weight_name][:] for weight_name in weight_names]
if len(weight_values):
layer_weights.append([l_name, weight_names, weight_values])
return layer_weights