@@ -122,9 +122,12 @@ def train_lang_model(model_path: int,
122
122
trn_indexed : List [int ],
123
123
val_indexed : List [int ],
124
124
vocab_size : int ,
125
+ lr : float ,
125
126
n_cycle : int = 2 ,
126
- em_sz : int = 1200 ,
127
- nh : int = 1200 ,
127
+ cycle_len : int = 3 ,
128
+ cycle_mult : int = 1 ,
129
+ em_sz : int = 400 ,
130
+ nh : int = 400 ,
128
131
nl : int = 3 ,
129
132
bptt : int = 20 ,
130
133
wd : int = 1e-7 ,
@@ -185,11 +188,14 @@ def train_lang_model(model_path: int,
185
188
dropoute = drops [3 ],
186
189
dropouth = drops [4 ])
187
190
188
- # learning rate is hardcoded, I already ran learning rate finder on this problem.
189
- lrs = 1e-3 / 2
190
-
191
191
# borrowed these parameters from fastai
192
- learner .fit (lrs , 2 , wds = wd , cycle_len = 3 , use_clr = (32 , 10 ), best_save_name = 'langmodel_best' )
192
+ learner .fit (lr ,
193
+ n_cycle = n_cycle ,
194
+ wds = wd ,
195
+ cycle_len = cycle_len ,
196
+ use_clr = (32 , 10 ),
197
+ cycle_mult = cycle_mult ,
198
+ best_save_name = 'langmodel_best' )
193
199
194
200
# eval sets model to inference mode (turns off dropout, etc.)
195
201
model = learner .model .eval ()
@@ -270,10 +276,14 @@ def get_emb_batch(lang_model, np_array, bs, dest_dir):
270
276
y_mean = get_mean_emb (raw_emb = y , idx_arr = x .data .cpu ().numpy ())
271
277
# get the last hidden state in the sequence. Returns arr of size (bs, encoder_dim)
272
278
y_last = y [:, - 1 , :]
279
+ # get the maximum across timesteps
280
+ y_max = y .max (1 )
273
281
274
282
# collect predictions
275
283
np .save (destPath / f'lang_model_mean_emb_{ i } .npy' , y_mean )
276
284
np .save (destPath / f'lang_model_last_emb_{ i } .npy' , y_last )
285
+ np .save (destPath / f'lang_model_last_emb_{ i } .npy' , y_max )
286
+ np .save (destPath / f'lang_model_pool_emb_{ i } .npy' , np .concatenate ([y_mean , y_max , y_last ], axis = 1 ))
277
287
278
288
logging .warning (f'Saved { 2 * len (data_chunked )} files to { str (destPath .absolute ())} ' )
279
289
0 commit comments