Commit
f8f037f
·
verified ·
1 Parent(s): b12fe34

Add ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue

Browse files

Upload fine-tuned rerankers for BioASQ 14B

Co-authored-by: André Ribeiro <andrepedro2004@hotmail.com>
Co-authored-by: Rúben Garrido <rubengarrido@ua.pt>

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "LABEL_0"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "is_decoder": false,
20
+ "label2id": {
21
+ "LABEL_0": 0
22
+ },
23
+ "layer_norm_eps": 1e-12,
24
+ "max_position_embeddings": 512,
25
+ "model_type": "bert",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "position_embedding_type": "absolute",
30
+ "tie_word_embeddings": true,
31
+ "transformers_version": "5.2.0",
32
+ "type_vocab_size": 2,
33
+ "use_cache": false,
34
+ "vocab_size": 30522
35
+ }
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90549382928613848bdd7c9d47503f09a50ca0d6e24fe03cafe742e9120d3b40
3
+ size 218989610
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/ranx_results.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "ncbi-MedCPT-Cross-Encoder",
3
+ "val_files": [
4
+ "../../data/val_data/13B3_golden.json",
5
+ "../../data/val_data/13B1_golden.json",
6
+ "../../data/val_data/13B2_golden.json",
7
+ "../../data/val_data/13B4_golden.json"
8
+ ],
9
+ "total": {
10
+ "ndcg@5": 0.7132871671763956,
11
+ "mrr": 0.8323138422035482,
12
+ "recall@10": 0.4122187536008182,
13
+ "recall@100": 0.9284123733146155,
14
+ "map@10": 0.3563436457873443,
15
+ "map-bioasq@10": 0.5765806197478992
16
+ },
17
+ "13B3_golden.json": {
18
+ "ndcg@5": 0.7127843511629302,
19
+ "mrr": 0.8637394957983193,
20
+ "recall@10": 0.41900119687192505,
21
+ "recall@100": 0.9418191480335674,
22
+ "map@10": 0.3607746912158015,
23
+ "map-bioasq@10": 0.5709368580765639
24
+ },
25
+ "13B1_golden.json": {
26
+ "ndcg@5": 0.6550588631081015,
27
+ "mrr": 0.7716293183940242,
28
+ "recall@10": 0.4762836657884654,
29
+ "recall@100": 0.9489863204885913,
30
+ "map@10": 0.3986474389958226,
31
+ "map-bioasq@10": 0.5349774354186119
32
+ },
33
+ "13B2_golden.json": {
34
+ "ndcg@5": 0.7890921829276484,
35
+ "mrr": 0.863249299719888,
36
+ "recall@10": 0.5038523144064057,
37
+ "recall@100": 0.9389523155472866,
38
+ "map@10": 0.46121303813544406,
39
+ "map-bioasq@10": 0.6805634920634921
40
+ },
41
+ "13B4_golden.json": {
42
+ "ndcg@5": 0.6962132715069025,
43
+ "mrr": 0.8306372549019607,
44
+ "recall@10": 0.24973783733647686,
45
+ "recall@100": 0.8838917091890159,
46
+ "map@10": 0.20473941480230906,
47
+ "map-bioasq@10": 0.5198446934329286
48
+ }
49
+ }
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_basic_tokenize": true,
6
+ "do_lower_case": true,
7
+ "extra_special_tokens": [
8
+ "[PAD]",
9
+ "[UNK]",
10
+ "[CLS]",
11
+ "[SEP]",
12
+ "[MASK]"
13
+ ],
14
+ "is_local": false,
15
+ "mask_token": "[MASK]",
16
+ "max_length": 512,
17
+ "model_max_length": 1000000000000000019884624838656,
18
+ "never_split": null,
19
+ "pad_to_multiple_of": null,
20
+ "pad_token": "[PAD]",
21
+ "pad_token_type_id": 0,
22
+ "padding_side": "right",
23
+ "sep_token": "[SEP]",
24
+ "stride": 0,
25
+ "strip_accents": null,
26
+ "tokenize_chinese_chars": true,
27
+ "tokenizer_class": "BertTokenizer",
28
+ "truncation_side": "right",
29
+ "truncation_strategy": "longest_first",
30
+ "unk_token": "[UNK]"
31
+ }
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/trainer_state.json ADDED
@@ -0,0 +1,989 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 6678,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.022461814914645103,
14
+ "grad_norm": 35.5,
15
+ "learning_rate": 1.9853249475890986e-05,
16
+ "loss": 0.94648193359375,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.044923629829290206,
21
+ "grad_norm": 88.5,
22
+ "learning_rate": 1.9703504043126685e-05,
23
+ "loss": 0.630087890625,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.0673854447439353,
28
+ "grad_norm": 11.1875,
29
+ "learning_rate": 1.9553758610362385e-05,
30
+ "loss": 0.4842578125,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.08984725965858041,
35
+ "grad_norm": 12.125,
36
+ "learning_rate": 1.9404013177598084e-05,
37
+ "loss": 0.47801971435546875,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.11230907457322552,
42
+ "grad_norm": 8.3125,
43
+ "learning_rate": 1.9254267744833783e-05,
44
+ "loss": 0.46963134765625,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.1347708894878706,
49
+ "grad_norm": 11.375,
50
+ "learning_rate": 1.9104522312069486e-05,
51
+ "loss": 0.42921875,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.15723270440251572,
56
+ "grad_norm": 8.5625,
57
+ "learning_rate": 1.8954776879305185e-05,
58
+ "loss": 0.47671875,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.17969451931716082,
63
+ "grad_norm": 13.3125,
64
+ "learning_rate": 1.880503144654088e-05,
65
+ "loss": 0.4298046875,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.20215633423180593,
70
+ "grad_norm": 8.3125,
71
+ "learning_rate": 1.865528601377658e-05,
72
+ "loss": 0.4160546875,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.22461814914645103,
77
+ "grad_norm": 9.0,
78
+ "learning_rate": 1.850554058101228e-05,
79
+ "loss": 0.4132421875,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.24707996406109614,
84
+ "grad_norm": 10.5,
85
+ "learning_rate": 1.835579514824798e-05,
86
+ "loss": 0.42578125,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 0.2695417789757412,
91
+ "grad_norm": 8.0625,
92
+ "learning_rate": 1.820604971548368e-05,
93
+ "loss": 0.3969140625,
94
+ "step": 600
95
+ },
96
+ {
97
+ "epoch": 0.2920035938903863,
98
+ "grad_norm": 7.3125,
99
+ "learning_rate": 1.805630428271938e-05,
100
+ "loss": 0.4116796875,
101
+ "step": 650
102
+ },
103
+ {
104
+ "epoch": 0.31446540880503143,
105
+ "grad_norm": 10.125,
106
+ "learning_rate": 1.790655884995508e-05,
107
+ "loss": 0.4092578125,
108
+ "step": 700
109
+ },
110
+ {
111
+ "epoch": 0.33692722371967654,
112
+ "grad_norm": 9.625,
113
+ "learning_rate": 1.7756813417190775e-05,
114
+ "loss": 0.3807421875,
115
+ "step": 750
116
+ },
117
+ {
118
+ "epoch": 0.35938903863432164,
119
+ "grad_norm": 8.375,
120
+ "learning_rate": 1.7607067984426475e-05,
121
+ "loss": 0.404609375,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 0.38185085354896675,
126
+ "grad_norm": 11.125,
127
+ "learning_rate": 1.7457322551662174e-05,
128
+ "loss": 0.41688232421875,
129
+ "step": 850
130
+ },
131
+ {
132
+ "epoch": 0.40431266846361186,
133
+ "grad_norm": 22.125,
134
+ "learning_rate": 1.7307577118897876e-05,
135
+ "loss": 0.371875,
136
+ "step": 900
137
+ },
138
+ {
139
+ "epoch": 0.42677448337825696,
140
+ "grad_norm": 9.375,
141
+ "learning_rate": 1.7157831686133576e-05,
142
+ "loss": 0.37583984375,
143
+ "step": 950
144
+ },
145
+ {
146
+ "epoch": 0.44923629829290207,
147
+ "grad_norm": 9.0,
148
+ "learning_rate": 1.7008086253369275e-05,
149
+ "loss": 0.3871875,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 0.4716981132075472,
154
+ "grad_norm": 11.5625,
155
+ "learning_rate": 1.6858340820604974e-05,
156
+ "loss": 0.38783203125,
157
+ "step": 1050
158
+ },
159
+ {
160
+ "epoch": 0.4941599281221923,
161
+ "grad_norm": 9.8125,
162
+ "learning_rate": 1.670859538784067e-05,
163
+ "loss": 0.3853125,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 0.5166217430368374,
168
+ "grad_norm": 6.96875,
169
+ "learning_rate": 1.6558849955076373e-05,
170
+ "loss": 0.4151171875,
171
+ "step": 1150
172
+ },
173
+ {
174
+ "epoch": 0.5390835579514824,
175
+ "grad_norm": 11.75,
176
+ "learning_rate": 1.6409104522312072e-05,
177
+ "loss": 0.3448046875,
178
+ "step": 1200
179
+ },
180
+ {
181
+ "epoch": 0.5615453728661276,
182
+ "grad_norm": 9.75,
183
+ "learning_rate": 1.625935908954777e-05,
184
+ "loss": 0.3898046875,
185
+ "step": 1250
186
+ },
187
+ {
188
+ "epoch": 0.5840071877807727,
189
+ "grad_norm": 10.875,
190
+ "learning_rate": 1.610961365678347e-05,
191
+ "loss": 0.3674609375,
192
+ "step": 1300
193
+ },
194
+ {
195
+ "epoch": 0.6064690026954178,
196
+ "grad_norm": 13.25,
197
+ "learning_rate": 1.595986822401917e-05,
198
+ "loss": 0.32759765625,
199
+ "step": 1350
200
+ },
201
+ {
202
+ "epoch": 0.6289308176100629,
203
+ "grad_norm": 23.0,
204
+ "learning_rate": 1.581012279125487e-05,
205
+ "loss": 0.38048828125,
206
+ "step": 1400
207
+ },
208
+ {
209
+ "epoch": 0.651392632524708,
210
+ "grad_norm": 14.875,
211
+ "learning_rate": 1.5660377358490568e-05,
212
+ "loss": 0.36399658203125,
213
+ "step": 1450
214
+ },
215
+ {
216
+ "epoch": 0.6738544474393531,
217
+ "grad_norm": 10.75,
218
+ "learning_rate": 1.5510631925726267e-05,
219
+ "loss": 0.3512890625,
220
+ "step": 1500
221
+ },
222
+ {
223
+ "epoch": 0.6963162623539982,
224
+ "grad_norm": 11.6875,
225
+ "learning_rate": 1.5360886492961966e-05,
226
+ "loss": 0.3547265625,
227
+ "step": 1550
228
+ },
229
+ {
230
+ "epoch": 0.7187780772686433,
231
+ "grad_norm": 12.875,
232
+ "learning_rate": 1.5211141060197666e-05,
233
+ "loss": 0.34892578125,
234
+ "step": 1600
235
+ },
236
+ {
237
+ "epoch": 0.7412398921832885,
238
+ "grad_norm": 10.8125,
239
+ "learning_rate": 1.5061395627433365e-05,
240
+ "loss": 0.38994140625,
241
+ "step": 1650
242
+ },
243
+ {
244
+ "epoch": 0.7637017070979335,
245
+ "grad_norm": 12.25,
246
+ "learning_rate": 1.4911650194669062e-05,
247
+ "loss": 0.3660469055175781,
248
+ "step": 1700
249
+ },
250
+ {
251
+ "epoch": 0.7861635220125787,
252
+ "grad_norm": 14.0,
253
+ "learning_rate": 1.4761904761904763e-05,
254
+ "loss": 0.4043359375,
255
+ "step": 1750
256
+ },
257
+ {
258
+ "epoch": 0.8086253369272237,
259
+ "grad_norm": 11.3125,
260
+ "learning_rate": 1.4612159329140462e-05,
261
+ "loss": 0.3288671875,
262
+ "step": 1800
263
+ },
264
+ {
265
+ "epoch": 0.8310871518418689,
266
+ "grad_norm": 9.5,
267
+ "learning_rate": 1.4462413896376162e-05,
268
+ "loss": 0.347109375,
269
+ "step": 1850
270
+ },
271
+ {
272
+ "epoch": 0.8535489667565139,
273
+ "grad_norm": 13.75,
274
+ "learning_rate": 1.4312668463611861e-05,
275
+ "loss": 0.3082421875,
276
+ "step": 1900
277
+ },
278
+ {
279
+ "epoch": 0.876010781671159,
280
+ "grad_norm": 8.375,
281
+ "learning_rate": 1.416292303084756e-05,
282
+ "loss": 0.3761328125,
283
+ "step": 1950
284
+ },
285
+ {
286
+ "epoch": 0.8984725965858041,
287
+ "grad_norm": 7.5,
288
+ "learning_rate": 1.401317759808326e-05,
289
+ "loss": 0.3083203125,
290
+ "step": 2000
291
+ },
292
+ {
293
+ "epoch": 0.9209344115004492,
294
+ "grad_norm": 13.0,
295
+ "learning_rate": 1.386343216531896e-05,
296
+ "loss": 0.3319140625,
297
+ "step": 2050
298
+ },
299
+ {
300
+ "epoch": 0.9433962264150944,
301
+ "grad_norm": 15.625,
302
+ "learning_rate": 1.3713686732554658e-05,
303
+ "loss": 0.374296875,
304
+ "step": 2100
305
+ },
306
+ {
307
+ "epoch": 0.9658580413297394,
308
+ "grad_norm": 14.0625,
309
+ "learning_rate": 1.3563941299790357e-05,
310
+ "loss": 0.4065625,
311
+ "step": 2150
312
+ },
313
+ {
314
+ "epoch": 0.9883198562443846,
315
+ "grad_norm": 11.625,
316
+ "learning_rate": 1.3414195867026056e-05,
317
+ "loss": 0.381357421875,
318
+ "step": 2200
319
+ },
320
+ {
321
+ "epoch": 1.0,
322
+ "eval_loss": 0.4728180468082428,
323
+ "eval_runtime": 2.9031,
324
+ "eval_samples_per_second": 116.427,
325
+ "eval_steps_per_second": 3.789,
326
+ "step": 2226
327
+ },
328
+ {
329
+ "epoch": 1.0107816711590296,
330
+ "grad_norm": 13.625,
331
+ "learning_rate": 1.3264450434261755e-05,
332
+ "loss": 0.32091796875,
333
+ "step": 2250
334
+ },
335
+ {
336
+ "epoch": 1.0332434860736748,
337
+ "grad_norm": 10.375,
338
+ "learning_rate": 1.3114705001497456e-05,
339
+ "loss": 0.35169921875,
340
+ "step": 2300
341
+ },
342
+ {
343
+ "epoch": 1.05570530098832,
344
+ "grad_norm": 14.5625,
345
+ "learning_rate": 1.2964959568733156e-05,
346
+ "loss": 0.355419921875,
347
+ "step": 2350
348
+ },
349
+ {
350
+ "epoch": 1.0781671159029649,
351
+ "grad_norm": 11.875,
352
+ "learning_rate": 1.2815214135968855e-05,
353
+ "loss": 0.376484375,
354
+ "step": 2400
355
+ },
356
+ {
357
+ "epoch": 1.10062893081761,
358
+ "grad_norm": 14.75,
359
+ "learning_rate": 1.2665468703204552e-05,
360
+ "loss": 0.3365234375,
361
+ "step": 2450
362
+ },
363
+ {
364
+ "epoch": 1.1230907457322552,
365
+ "grad_norm": 184.0,
366
+ "learning_rate": 1.2515723270440252e-05,
367
+ "loss": 0.35140625,
368
+ "step": 2500
369
+ },
370
+ {
371
+ "epoch": 1.1455525606469004,
372
+ "grad_norm": 12.1875,
373
+ "learning_rate": 1.236597783767595e-05,
374
+ "loss": 0.368740234375,
375
+ "step": 2550
376
+ },
377
+ {
378
+ "epoch": 1.1680143755615453,
379
+ "grad_norm": 10.75,
380
+ "learning_rate": 1.2216232404911652e-05,
381
+ "loss": 0.34755859375,
382
+ "step": 2600
383
+ },
384
+ {
385
+ "epoch": 1.1904761904761905,
386
+ "grad_norm": 12.25,
387
+ "learning_rate": 1.2066486972147351e-05,
388
+ "loss": 0.358671875,
389
+ "step": 2650
390
+ },
391
+ {
392
+ "epoch": 1.2129380053908356,
393
+ "grad_norm": 12.6875,
394
+ "learning_rate": 1.191674153938305e-05,
395
+ "loss": 0.32927734375,
396
+ "step": 2700
397
+ },
398
+ {
399
+ "epoch": 1.2353998203054806,
400
+ "grad_norm": 11.1875,
401
+ "learning_rate": 1.176699610661875e-05,
402
+ "loss": 0.3071875,
403
+ "step": 2750
404
+ },
405
+ {
406
+ "epoch": 1.2578616352201257,
407
+ "grad_norm": 10.5,
408
+ "learning_rate": 1.1617250673854449e-05,
409
+ "loss": 0.315390625,
410
+ "step": 2800
411
+ },
412
+ {
413
+ "epoch": 1.280323450134771,
414
+ "grad_norm": 8.125,
415
+ "learning_rate": 1.1467505241090146e-05,
416
+ "loss": 0.33751953125,
417
+ "step": 2850
418
+ },
419
+ {
420
+ "epoch": 1.302785265049416,
421
+ "grad_norm": 10.25,
422
+ "learning_rate": 1.1317759808325847e-05,
423
+ "loss": 0.39599609375,
424
+ "step": 2900
425
+ },
426
+ {
427
+ "epoch": 1.3252470799640612,
428
+ "grad_norm": 10.3125,
429
+ "learning_rate": 1.1168014375561546e-05,
430
+ "loss": 0.3711328125,
431
+ "step": 2950
432
+ },
433
+ {
434
+ "epoch": 1.3477088948787062,
435
+ "grad_norm": 13.875,
436
+ "learning_rate": 1.1018268942797245e-05,
437
+ "loss": 0.370859375,
438
+ "step": 3000
439
+ },
440
+ {
441
+ "epoch": 1.3701707097933513,
442
+ "grad_norm": 8.875,
443
+ "learning_rate": 1.0868523510032945e-05,
444
+ "loss": 0.317587890625,
445
+ "step": 3050
446
+ },
447
+ {
448
+ "epoch": 1.3926325247079965,
449
+ "grad_norm": 8.3125,
450
+ "learning_rate": 1.0718778077268644e-05,
451
+ "loss": 0.365966796875,
452
+ "step": 3100
453
+ },
454
+ {
455
+ "epoch": 1.4150943396226414,
456
+ "grad_norm": 8.0,
457
+ "learning_rate": 1.0569032644504345e-05,
458
+ "loss": 0.3309375,
459
+ "step": 3150
460
+ },
461
+ {
462
+ "epoch": 1.4375561545372866,
463
+ "grad_norm": 11.5625,
464
+ "learning_rate": 1.0419287211740044e-05,
465
+ "loss": 0.3394921875,
466
+ "step": 3200
467
+ },
468
+ {
469
+ "epoch": 1.4600179694519317,
470
+ "grad_norm": 13.5625,
471
+ "learning_rate": 1.0269541778975742e-05,
472
+ "loss": 0.33572265625,
473
+ "step": 3250
474
+ },
475
+ {
476
+ "epoch": 1.482479784366577,
477
+ "grad_norm": 13.1875,
478
+ "learning_rate": 1.011979634621144e-05,
479
+ "loss": 0.296796875,
480
+ "step": 3300
481
+ },
482
+ {
483
+ "epoch": 1.504941599281222,
484
+ "grad_norm": 12.6875,
485
+ "learning_rate": 9.97005091344714e-06,
486
+ "loss": 0.370859375,
487
+ "step": 3350
488
+ },
489
+ {
490
+ "epoch": 1.527403414195867,
491
+ "grad_norm": 23.5,
492
+ "learning_rate": 9.820305480682841e-06,
493
+ "loss": 0.32857421875,
494
+ "step": 3400
495
+ },
496
+ {
497
+ "epoch": 1.5498652291105122,
498
+ "grad_norm": 17.5,
499
+ "learning_rate": 9.670560047918538e-06,
500
+ "loss": 0.31537109375,
501
+ "step": 3450
502
+ },
503
+ {
504
+ "epoch": 1.5723270440251573,
505
+ "grad_norm": 8.25,
506
+ "learning_rate": 9.52081461515424e-06,
507
+ "loss": 0.30197265625,
508
+ "step": 3500
509
+ },
510
+ {
511
+ "epoch": 1.5947888589398023,
512
+ "grad_norm": 6.4375,
513
+ "learning_rate": 9.371069182389939e-06,
514
+ "loss": 0.30685546875,
515
+ "step": 3550
516
+ },
517
+ {
518
+ "epoch": 1.6172506738544474,
519
+ "grad_norm": 14.0,
520
+ "learning_rate": 9.221323749625636e-06,
521
+ "loss": 0.333203125,
522
+ "step": 3600
523
+ },
524
+ {
525
+ "epoch": 1.6397124887690926,
526
+ "grad_norm": 11.5,
527
+ "learning_rate": 9.071578316861337e-06,
528
+ "loss": 0.34734375,
529
+ "step": 3650
530
+ },
531
+ {
532
+ "epoch": 1.6621743036837375,
533
+ "grad_norm": 10.625,
534
+ "learning_rate": 8.921832884097036e-06,
535
+ "loss": 0.344521484375,
536
+ "step": 3700
537
+ },
538
+ {
539
+ "epoch": 1.684636118598383,
540
+ "grad_norm": 11.875,
541
+ "learning_rate": 8.772087451332735e-06,
542
+ "loss": 0.36986328125,
543
+ "step": 3750
544
+ },
545
+ {
546
+ "epoch": 1.7070979335130279,
547
+ "grad_norm": 10.8125,
548
+ "learning_rate": 8.622342018568435e-06,
549
+ "loss": 0.33515625,
550
+ "step": 3800
551
+ },
552
+ {
553
+ "epoch": 1.7295597484276728,
554
+ "grad_norm": 6.5625,
555
+ "learning_rate": 8.472596585804134e-06,
556
+ "loss": 0.35638671875,
557
+ "step": 3850
558
+ },
559
+ {
560
+ "epoch": 1.7520215633423182,
561
+ "grad_norm": 8.1875,
562
+ "learning_rate": 8.322851153039833e-06,
563
+ "loss": 0.3527734375,
564
+ "step": 3900
565
+ },
566
+ {
567
+ "epoch": 1.7744833782569631,
568
+ "grad_norm": 11.3125,
569
+ "learning_rate": 8.173105720275532e-06,
570
+ "loss": 0.36638671875,
571
+ "step": 3950
572
+ },
573
+ {
574
+ "epoch": 1.7969451931716083,
575
+ "grad_norm": 12.75,
576
+ "learning_rate": 8.023360287511232e-06,
577
+ "loss": 0.3379296875,
578
+ "step": 4000
579
+ },
580
+ {
581
+ "epoch": 1.8194070080862534,
582
+ "grad_norm": 7.96875,
583
+ "learning_rate": 7.87361485474693e-06,
584
+ "loss": 0.3413671875,
585
+ "step": 4050
586
+ },
587
+ {
588
+ "epoch": 1.8418688230008984,
589
+ "grad_norm": 10.875,
590
+ "learning_rate": 7.72386942198263e-06,
591
+ "loss": 0.342890625,
592
+ "step": 4100
593
+ },
594
+ {
595
+ "epoch": 1.8643306379155435,
596
+ "grad_norm": 12.75,
597
+ "learning_rate": 7.574123989218329e-06,
598
+ "loss": 0.3216015625,
599
+ "step": 4150
600
+ },
601
+ {
602
+ "epoch": 1.8867924528301887,
603
+ "grad_norm": 90.0,
604
+ "learning_rate": 7.4243785564540284e-06,
605
+ "loss": 0.341484375,
606
+ "step": 4200
607
+ },
608
+ {
609
+ "epoch": 1.9092542677448336,
610
+ "grad_norm": 6.28125,
611
+ "learning_rate": 7.2746331236897285e-06,
612
+ "loss": 0.33556640625,
613
+ "step": 4250
614
+ },
615
+ {
616
+ "epoch": 1.931716082659479,
617
+ "grad_norm": 12.9375,
618
+ "learning_rate": 7.124887690925427e-06,
619
+ "loss": 0.36302734375,
620
+ "step": 4300
621
+ },
622
+ {
623
+ "epoch": 1.954177897574124,
624
+ "grad_norm": 14.4375,
625
+ "learning_rate": 6.975142258161126e-06,
626
+ "loss": 0.3556640625,
627
+ "step": 4350
628
+ },
629
+ {
630
+ "epoch": 1.9766397124887691,
631
+ "grad_norm": 10.75,
632
+ "learning_rate": 6.825396825396826e-06,
633
+ "loss": 0.271953125,
634
+ "step": 4400
635
+ },
636
+ {
637
+ "epoch": 1.9991015274034143,
638
+ "grad_norm": 12.375,
639
+ "learning_rate": 6.675651392632525e-06,
640
+ "loss": 0.338671875,
641
+ "step": 4450
642
+ },
643
+ {
644
+ "epoch": 2.0,
645
+ "eval_loss": 0.4453125,
646
+ "eval_runtime": 2.9981,
647
+ "eval_samples_per_second": 112.736,
648
+ "eval_steps_per_second": 3.669,
649
+ "step": 4452
650
+ },
651
+ {
652
+ "epoch": 2.0215633423180592,
653
+ "grad_norm": 12.4375,
654
+ "learning_rate": 6.5259059598682255e-06,
655
+ "loss": 0.36619140625,
656
+ "step": 4500
657
+ },
658
+ {
659
+ "epoch": 2.0440251572327046,
660
+ "grad_norm": 17.25,
661
+ "learning_rate": 6.376160527103924e-06,
662
+ "loss": 0.38908203125,
663
+ "step": 4550
664
+ },
665
+ {
666
+ "epoch": 2.0664869721473496,
667
+ "grad_norm": 8.625,
668
+ "learning_rate": 6.226415094339623e-06,
669
+ "loss": 0.35123046875,
670
+ "step": 4600
671
+ },
672
+ {
673
+ "epoch": 2.0889487870619945,
674
+ "grad_norm": 13.0625,
675
+ "learning_rate": 6.076669661575323e-06,
676
+ "loss": 0.36943359375,
677
+ "step": 4650
678
+ },
679
+ {
680
+ "epoch": 2.11141060197664,
681
+ "grad_norm": 12.8125,
682
+ "learning_rate": 5.9269242288110215e-06,
683
+ "loss": 0.35146484375,
684
+ "step": 4700
685
+ },
686
+ {
687
+ "epoch": 2.133872416891285,
688
+ "grad_norm": 10.9375,
689
+ "learning_rate": 5.777178796046721e-06,
690
+ "loss": 0.3725390625,
691
+ "step": 4750
692
+ },
693
+ {
694
+ "epoch": 2.1563342318059298,
695
+ "grad_norm": 13.75,
696
+ "learning_rate": 5.627433363282421e-06,
697
+ "loss": 0.36087890625,
698
+ "step": 4800
699
+ },
700
+ {
701
+ "epoch": 2.178796046720575,
702
+ "grad_norm": 14.0625,
703
+ "learning_rate": 5.47768793051812e-06,
704
+ "loss": 0.36873046875,
705
+ "step": 4850
706
+ },
707
+ {
708
+ "epoch": 2.20125786163522,
709
+ "grad_norm": 15.1875,
710
+ "learning_rate": 5.327942497753818e-06,
711
+ "loss": 0.35255859375,
712
+ "step": 4900
713
+ },
714
+ {
715
+ "epoch": 2.223719676549865,
716
+ "grad_norm": 11.5625,
717
+ "learning_rate": 5.1781970649895185e-06,
718
+ "loss": 0.3751953125,
719
+ "step": 4950
720
+ },
721
+ {
722
+ "epoch": 2.2461814914645104,
723
+ "grad_norm": 14.0,
724
+ "learning_rate": 5.028451632225218e-06,
725
+ "loss": 0.384609375,
726
+ "step": 5000
727
+ },
728
+ {
729
+ "epoch": 2.2686433063791553,
730
+ "grad_norm": 18.5,
731
+ "learning_rate": 4.878706199460917e-06,
732
+ "loss": 0.37974609375,
733
+ "step": 5050
734
+ },
735
+ {
736
+ "epoch": 2.2911051212938007,
737
+ "grad_norm": 13.875,
738
+ "learning_rate": 4.728960766696616e-06,
739
+ "loss": 0.35677734375,
740
+ "step": 5100
741
+ },
742
+ {
743
+ "epoch": 2.3135669362084457,
744
+ "grad_norm": 14.9375,
745
+ "learning_rate": 4.579215333932315e-06,
746
+ "loss": 0.33296875,
747
+ "step": 5150
748
+ },
749
+ {
750
+ "epoch": 2.3360287511230906,
751
+ "grad_norm": 16.25,
752
+ "learning_rate": 4.429469901168015e-06,
753
+ "loss": 0.3341796875,
754
+ "step": 5200
755
+ },
756
+ {
757
+ "epoch": 2.358490566037736,
758
+ "grad_norm": 12.4375,
759
+ "learning_rate": 4.279724468403715e-06,
760
+ "loss": 0.3711328125,
761
+ "step": 5250
762
+ },
763
+ {
764
+ "epoch": 2.380952380952381,
765
+ "grad_norm": 13.5625,
766
+ "learning_rate": 4.129979035639413e-06,
767
+ "loss": 0.3920703125,
768
+ "step": 5300
769
+ },
770
+ {
771
+ "epoch": 2.403414195867026,
772
+ "grad_norm": 10.25,
773
+ "learning_rate": 3.980233602875112e-06,
774
+ "loss": 0.32033203125,
775
+ "step": 5350
776
+ },
777
+ {
778
+ "epoch": 2.4258760107816713,
779
+ "grad_norm": 10.0625,
780
+ "learning_rate": 3.830488170110812e-06,
781
+ "loss": 0.345234375,
782
+ "step": 5400
783
+ },
784
+ {
785
+ "epoch": 2.448337825696316,
786
+ "grad_norm": 15.8125,
787
+ "learning_rate": 3.680742737346511e-06,
788
+ "loss": 0.40408203125,
789
+ "step": 5450
790
+ },
791
+ {
792
+ "epoch": 2.470799640610961,
793
+ "grad_norm": 12.8125,
794
+ "learning_rate": 3.5309973045822103e-06,
795
+ "loss": 0.345,
796
+ "step": 5500
797
+ },
798
+ {
799
+ "epoch": 2.4932614555256065,
800
+ "grad_norm": 15.0,
801
+ "learning_rate": 3.38125187181791e-06,
802
+ "loss": 0.3587109375,
803
+ "step": 5550
804
+ },
805
+ {
806
+ "epoch": 2.5157232704402515,
807
+ "grad_norm": 13.75,
808
+ "learning_rate": 3.231506439053609e-06,
809
+ "loss": 0.37056640625,
810
+ "step": 5600
811
+ },
812
+ {
813
+ "epoch": 2.538185085354897,
814
+ "grad_norm": 9.0,
815
+ "learning_rate": 3.0817610062893084e-06,
816
+ "loss": 0.352724609375,
817
+ "step": 5650
818
+ },
819
+ {
820
+ "epoch": 2.560646900269542,
821
+ "grad_norm": 12.5625,
822
+ "learning_rate": 2.9320155735250076e-06,
823
+ "loss": 0.3362890625,
824
+ "step": 5700
825
+ },
826
+ {
827
+ "epoch": 2.5831087151841867,
828
+ "grad_norm": 13.4375,
829
+ "learning_rate": 2.7822701407607073e-06,
830
+ "loss": 0.3505078125,
831
+ "step": 5750
832
+ },
833
+ {
834
+ "epoch": 2.605570530098832,
835
+ "grad_norm": 14.0,
836
+ "learning_rate": 2.632524707996406e-06,
837
+ "loss": 0.401796875,
838
+ "step": 5800
839
+ },
840
+ {
841
+ "epoch": 2.628032345013477,
842
+ "grad_norm": 12.9375,
843
+ "learning_rate": 2.4827792752321057e-06,
844
+ "loss": 0.3735546875,
845
+ "step": 5850
846
+ },
847
+ {
848
+ "epoch": 2.6504941599281224,
849
+ "grad_norm": 11.375,
850
+ "learning_rate": 2.333033842467805e-06,
851
+ "loss": 0.3434375,
852
+ "step": 5900
853
+ },
854
+ {
855
+ "epoch": 2.6729559748427674,
856
+ "grad_norm": 10.9375,
857
+ "learning_rate": 2.183288409703504e-06,
858
+ "loss": 0.34935546875,
859
+ "step": 5950
860
+ },
861
+ {
862
+ "epoch": 2.6954177897574123,
863
+ "grad_norm": 11.0,
864
+ "learning_rate": 2.0335429769392034e-06,
865
+ "loss": 0.3768359375,
866
+ "step": 6000
867
+ },
868
+ {
869
+ "epoch": 2.7178796046720572,
870
+ "grad_norm": 18.0,
871
+ "learning_rate": 1.8837975441749028e-06,
872
+ "loss": 0.35380859375,
873
+ "step": 6050
874
+ },
875
+ {
876
+ "epoch": 2.7403414195867026,
877
+ "grad_norm": 12.125,
878
+ "learning_rate": 1.734052111410602e-06,
879
+ "loss": 0.3534765625,
880
+ "step": 6100
881
+ },
882
+ {
883
+ "epoch": 2.7628032345013476,
884
+ "grad_norm": 10.375,
885
+ "learning_rate": 1.5843066786463015e-06,
886
+ "loss": 0.382421875,
887
+ "step": 6150
888
+ },
889
+ {
890
+ "epoch": 2.785265049415993,
891
+ "grad_norm": 10.75,
892
+ "learning_rate": 1.4345612458820007e-06,
893
+ "loss": 0.31984375,
894
+ "step": 6200
895
+ },
896
+ {
897
+ "epoch": 2.807726864330638,
898
+ "grad_norm": 13.3125,
899
+ "learning_rate": 1.2848158131177e-06,
900
+ "loss": 0.37189453125,
901
+ "step": 6250
902
+ },
903
+ {
904
+ "epoch": 2.830188679245283,
905
+ "grad_norm": 13.875,
906
+ "learning_rate": 1.1350703803533992e-06,
907
+ "loss": 0.37873046875,
908
+ "step": 6300
909
+ },
910
+ {
911
+ "epoch": 2.852650494159928,
912
+ "grad_norm": 8.8125,
913
+ "learning_rate": 9.853249475890986e-07,
914
+ "loss": 0.344921875,
915
+ "step": 6350
916
+ },
917
+ {
918
+ "epoch": 2.875112309074573,
919
+ "grad_norm": 11.625,
920
+ "learning_rate": 8.355795148247979e-07,
921
+ "loss": 0.3953515625,
922
+ "step": 6400
923
+ },
924
+ {
925
+ "epoch": 2.8975741239892185,
926
+ "grad_norm": 15.6875,
927
+ "learning_rate": 6.858340820604972e-07,
928
+ "loss": 0.34615234375,
929
+ "step": 6450
930
+ },
931
+ {
932
+ "epoch": 2.9200359389038635,
933
+ "grad_norm": 11.8125,
934
+ "learning_rate": 5.360886492961965e-07,
935
+ "loss": 0.3701953125,
936
+ "step": 6500
937
+ },
938
+ {
939
+ "epoch": 2.9424977538185084,
940
+ "grad_norm": 184.0,
941
+ "learning_rate": 3.863432165318958e-07,
942
+ "loss": 0.3726953125,
943
+ "step": 6550
944
+ },
945
+ {
946
+ "epoch": 2.964959568733154,
947
+ "grad_norm": 6.84375,
948
+ "learning_rate": 2.365977837675951e-07,
949
+ "loss": 0.327265625,
950
+ "step": 6600
951
+ },
952
+ {
953
+ "epoch": 2.9874213836477987,
954
+ "grad_norm": 15.625,
955
+ "learning_rate": 8.685235100329441e-08,
956
+ "loss": 0.36375,
957
+ "step": 6650
958
+ },
959
+ {
960
+ "epoch": 3.0,
961
+ "eval_loss": 0.443925678730011,
962
+ "eval_runtime": 2.6416,
963
+ "eval_samples_per_second": 127.953,
964
+ "eval_steps_per_second": 4.164,
965
+ "step": 6678
966
+ }
967
+ ],
968
+ "logging_steps": 50,
969
+ "max_steps": 6678,
970
+ "num_input_tokens_seen": 0,
971
+ "num_train_epochs": 3,
972
+ "save_steps": 500,
973
+ "stateful_callbacks": {
974
+ "TrainerControl": {
975
+ "args": {
976
+ "should_epoch_stop": false,
977
+ "should_evaluate": false,
978
+ "should_log": false,
979
+ "should_save": true,
980
+ "should_training_stop": true
981
+ },
982
+ "attributes": {}
983
+ }
984
+ },
985
+ "total_flos": 0.0,
986
+ "train_batch_size": 16,
987
+ "trial_name": null,
988
+ "trial_params": null
989
+ }
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90ac4a19c60de832e6829401bdbbab0d9ec1dc5d343ad75f0ee82a619c288532
3
+ size 5329