Add ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue

Browse files

Upload fine-tuned rerankers for BioASQ 14B

Co-authored-by: André Ribeiro <andrepedro2004@hotmail.com>
Co-authored-by: Rúben Garrido <rubengarrido@ua.pt>

Files changed (7) hide show

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/config.json +35 -0
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/model.safetensors +3 -0
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/ranx_results.json +49 -0
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/tokenizer.json +0 -0
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/tokenizer_config.json +31 -0
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/trainer_state.json +989 -0
ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/training_args.bin +3 -0

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "add_cross_attention": false,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": null,
+  "classifier_dropout": null,
+  "dtype": "bfloat16",
+  "eos_token_id": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "is_decoder": false,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.2.0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 30522
+}

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90549382928613848bdd7c9d47503f09a50ca0d6e24fe03cafe742e9120d3b40
+size 218989610

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/ranx_results.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+    "model": "ncbi-MedCPT-Cross-Encoder",
+    "val_files": [
+        "../../data/val_data/13B3_golden.json",
+        "../../data/val_data/13B1_golden.json",
+        "../../data/val_data/13B2_golden.json",
+        "../../data/val_data/13B4_golden.json"
+    ],
+    "total": {
+        "ndcg@5": 0.7132871671763956,
+        "mrr": 0.8323138422035482,
+        "recall@10": 0.4122187536008182,
+        "recall@100": 0.9284123733146155,
+        "map@10": 0.3563436457873443,
+        "map-bioasq@10": 0.5765806197478992
+    },
+    "13B3_golden.json": {
+        "ndcg@5": 0.7127843511629302,
+        "mrr": 0.8637394957983193,
+        "recall@10": 0.41900119687192505,
+        "recall@100": 0.9418191480335674,
+        "map@10": 0.3607746912158015,
+        "map-bioasq@10": 0.5709368580765639
+    },
+    "13B1_golden.json": {
+        "ndcg@5": 0.6550588631081015,
+        "mrr": 0.7716293183940242,
+        "recall@10": 0.4762836657884654,
+        "recall@100": 0.9489863204885913,
+        "map@10": 0.3986474389958226,
+        "map-bioasq@10": 0.5349774354186119
+    },
+    "13B2_golden.json": {
+        "ndcg@5": 0.7890921829276484,
+        "mrr": 0.863249299719888,
+        "recall@10": 0.5038523144064057,
+        "recall@100": 0.9389523155472866,
+        "map@10": 0.46121303813544406,
+        "map-bioasq@10": 0.6805634920634921
+    },
+    "13B4_golden.json": {
+        "ndcg@5": 0.6962132715069025,
+        "mrr": 0.8306372549019607,
+        "recall@10": 0.24973783733647686,
+        "recall@100": 0.8838917091890159,
+        "map@10": 0.20473941480230906,
+        "map-bioasq@10": 0.5198446934329286
+    }
+}

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "backend": "tokenizers",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": [
+    "[PAD]",
+    "[UNK]",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]"
+  ],
+  "is_local": false,
+  "mask_token": "[MASK]",
+  "max_length": 512,
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/trainer_state.json ADDED Viewed

	@@ -0,0 +1,989 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 6678,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.022461814914645103,
+      "grad_norm": 35.5,
+      "learning_rate": 1.9853249475890986e-05,
+      "loss": 0.94648193359375,
+      "step": 50
+    },
+    {
+      "epoch": 0.044923629829290206,
+      "grad_norm": 88.5,
+      "learning_rate": 1.9703504043126685e-05,
+      "loss": 0.630087890625,
+      "step": 100
+    },
+    {
+      "epoch": 0.0673854447439353,
+      "grad_norm": 11.1875,
+      "learning_rate": 1.9553758610362385e-05,
+      "loss": 0.4842578125,
+      "step": 150
+    },
+    {
+      "epoch": 0.08984725965858041,
+      "grad_norm": 12.125,
+      "learning_rate": 1.9404013177598084e-05,
+      "loss": 0.47801971435546875,
+      "step": 200
+    },
+    {
+      "epoch": 0.11230907457322552,
+      "grad_norm": 8.3125,
+      "learning_rate": 1.9254267744833783e-05,
+      "loss": 0.46963134765625,
+      "step": 250
+    },
+    {
+      "epoch": 0.1347708894878706,
+      "grad_norm": 11.375,
+      "learning_rate": 1.9104522312069486e-05,
+      "loss": 0.42921875,
+      "step": 300
+    },
+    {
+      "epoch": 0.15723270440251572,
+      "grad_norm": 8.5625,
+      "learning_rate": 1.8954776879305185e-05,
+      "loss": 0.47671875,
+      "step": 350
+    },
+    {
+      "epoch": 0.17969451931716082,
+      "grad_norm": 13.3125,
+      "learning_rate": 1.880503144654088e-05,
+      "loss": 0.4298046875,
+      "step": 400
+    },
+    {
+      "epoch": 0.20215633423180593,
+      "grad_norm": 8.3125,
+      "learning_rate": 1.865528601377658e-05,
+      "loss": 0.4160546875,
+      "step": 450
+    },
+    {
+      "epoch": 0.22461814914645103,
+      "grad_norm": 9.0,
+      "learning_rate": 1.850554058101228e-05,
+      "loss": 0.4132421875,
+      "step": 500
+    },
+    {
+      "epoch": 0.24707996406109614,
+      "grad_norm": 10.5,
+      "learning_rate": 1.835579514824798e-05,
+      "loss": 0.42578125,
+      "step": 550
+    },
+    {
+      "epoch": 0.2695417789757412,
+      "grad_norm": 8.0625,
+      "learning_rate": 1.820604971548368e-05,
+      "loss": 0.3969140625,
+      "step": 600
+    },
+    {
+      "epoch": 0.2920035938903863,
+      "grad_norm": 7.3125,
+      "learning_rate": 1.805630428271938e-05,
+      "loss": 0.4116796875,
+      "step": 650
+    },
+    {
+      "epoch": 0.31446540880503143,
+      "grad_norm": 10.125,
+      "learning_rate": 1.790655884995508e-05,
+      "loss": 0.4092578125,
+      "step": 700
+    },
+    {
+      "epoch": 0.33692722371967654,
+      "grad_norm": 9.625,
+      "learning_rate": 1.7756813417190775e-05,
+      "loss": 0.3807421875,
+      "step": 750
+    },
+    {
+      "epoch": 0.35938903863432164,
+      "grad_norm": 8.375,
+      "learning_rate": 1.7607067984426475e-05,
+      "loss": 0.404609375,
+      "step": 800
+    },
+    {
+      "epoch": 0.38185085354896675,
+      "grad_norm": 11.125,
+      "learning_rate": 1.7457322551662174e-05,
+      "loss": 0.41688232421875,
+      "step": 850
+    },
+    {
+      "epoch": 0.40431266846361186,
+      "grad_norm": 22.125,
+      "learning_rate": 1.7307577118897876e-05,
+      "loss": 0.371875,
+      "step": 900
+    },
+    {
+      "epoch": 0.42677448337825696,
+      "grad_norm": 9.375,
+      "learning_rate": 1.7157831686133576e-05,
+      "loss": 0.37583984375,
+      "step": 950
+    },
+    {
+      "epoch": 0.44923629829290207,
+      "grad_norm": 9.0,
+      "learning_rate": 1.7008086253369275e-05,
+      "loss": 0.3871875,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4716981132075472,
+      "grad_norm": 11.5625,
+      "learning_rate": 1.6858340820604974e-05,
+      "loss": 0.38783203125,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4941599281221923,
+      "grad_norm": 9.8125,
+      "learning_rate": 1.670859538784067e-05,
+      "loss": 0.3853125,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5166217430368374,
+      "grad_norm": 6.96875,
+      "learning_rate": 1.6558849955076373e-05,
+      "loss": 0.4151171875,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5390835579514824,
+      "grad_norm": 11.75,
+      "learning_rate": 1.6409104522312072e-05,
+      "loss": 0.3448046875,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5615453728661276,
+      "grad_norm": 9.75,
+      "learning_rate": 1.625935908954777e-05,
+      "loss": 0.3898046875,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5840071877807727,
+      "grad_norm": 10.875,
+      "learning_rate": 1.610961365678347e-05,
+      "loss": 0.3674609375,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6064690026954178,
+      "grad_norm": 13.25,
+      "learning_rate": 1.595986822401917e-05,
+      "loss": 0.32759765625,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6289308176100629,
+      "grad_norm": 23.0,
+      "learning_rate": 1.581012279125487e-05,
+      "loss": 0.38048828125,
+      "step": 1400
+    },
+    {
+      "epoch": 0.651392632524708,
+      "grad_norm": 14.875,
+      "learning_rate": 1.5660377358490568e-05,
+      "loss": 0.36399658203125,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6738544474393531,
+      "grad_norm": 10.75,
+      "learning_rate": 1.5510631925726267e-05,
+      "loss": 0.3512890625,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6963162623539982,
+      "grad_norm": 11.6875,
+      "learning_rate": 1.5360886492961966e-05,
+      "loss": 0.3547265625,
+      "step": 1550
+    },
+    {
+      "epoch": 0.7187780772686433,
+      "grad_norm": 12.875,
+      "learning_rate": 1.5211141060197666e-05,
+      "loss": 0.34892578125,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7412398921832885,
+      "grad_norm": 10.8125,
+      "learning_rate": 1.5061395627433365e-05,
+      "loss": 0.38994140625,
+      "step": 1650
+    },
+    {
+      "epoch": 0.7637017070979335,
+      "grad_norm": 12.25,
+      "learning_rate": 1.4911650194669062e-05,
+      "loss": 0.3660469055175781,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7861635220125787,
+      "grad_norm": 14.0,
+      "learning_rate": 1.4761904761904763e-05,
+      "loss": 0.4043359375,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8086253369272237,
+      "grad_norm": 11.3125,
+      "learning_rate": 1.4612159329140462e-05,
+      "loss": 0.3288671875,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8310871518418689,
+      "grad_norm": 9.5,
+      "learning_rate": 1.4462413896376162e-05,
+      "loss": 0.347109375,
+      "step": 1850
+    },
+    {
+      "epoch": 0.8535489667565139,
+      "grad_norm": 13.75,
+      "learning_rate": 1.4312668463611861e-05,
+      "loss": 0.3082421875,
+      "step": 1900
+    },
+    {
+      "epoch": 0.876010781671159,
+      "grad_norm": 8.375,
+      "learning_rate": 1.416292303084756e-05,
+      "loss": 0.3761328125,
+      "step": 1950
+    },
+    {
+      "epoch": 0.8984725965858041,
+      "grad_norm": 7.5,
+      "learning_rate": 1.401317759808326e-05,
+      "loss": 0.3083203125,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9209344115004492,
+      "grad_norm": 13.0,
+      "learning_rate": 1.386343216531896e-05,
+      "loss": 0.3319140625,
+      "step": 2050
+    },
+    {
+      "epoch": 0.9433962264150944,
+      "grad_norm": 15.625,
+      "learning_rate": 1.3713686732554658e-05,
+      "loss": 0.374296875,
+      "step": 2100
+    },
+    {
+      "epoch": 0.9658580413297394,
+      "grad_norm": 14.0625,
+      "learning_rate": 1.3563941299790357e-05,
+      "loss": 0.4065625,
+      "step": 2150
+    },
+    {
+      "epoch": 0.9883198562443846,
+      "grad_norm": 11.625,
+      "learning_rate": 1.3414195867026056e-05,
+      "loss": 0.381357421875,
+      "step": 2200
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.4728180468082428,
+      "eval_runtime": 2.9031,
+      "eval_samples_per_second": 116.427,
+      "eval_steps_per_second": 3.789,
+      "step": 2226
+    },
+    {
+      "epoch": 1.0107816711590296,
+      "grad_norm": 13.625,
+      "learning_rate": 1.3264450434261755e-05,
+      "loss": 0.32091796875,
+      "step": 2250
+    },
+    {
+      "epoch": 1.0332434860736748,
+      "grad_norm": 10.375,
+      "learning_rate": 1.3114705001497456e-05,
+      "loss": 0.35169921875,
+      "step": 2300
+    },
+    {
+      "epoch": 1.05570530098832,
+      "grad_norm": 14.5625,
+      "learning_rate": 1.2964959568733156e-05,
+      "loss": 0.355419921875,
+      "step": 2350
+    },
+    {
+      "epoch": 1.0781671159029649,
+      "grad_norm": 11.875,
+      "learning_rate": 1.2815214135968855e-05,
+      "loss": 0.376484375,
+      "step": 2400
+    },
+    {
+      "epoch": 1.10062893081761,
+      "grad_norm": 14.75,
+      "learning_rate": 1.2665468703204552e-05,
+      "loss": 0.3365234375,
+      "step": 2450
+    },
+    {
+      "epoch": 1.1230907457322552,
+      "grad_norm": 184.0,
+      "learning_rate": 1.2515723270440252e-05,
+      "loss": 0.35140625,
+      "step": 2500
+    },
+    {
+      "epoch": 1.1455525606469004,
+      "grad_norm": 12.1875,
+      "learning_rate": 1.236597783767595e-05,
+      "loss": 0.368740234375,
+      "step": 2550
+    },
+    {
+      "epoch": 1.1680143755615453,
+      "grad_norm": 10.75,
+      "learning_rate": 1.2216232404911652e-05,
+      "loss": 0.34755859375,
+      "step": 2600
+    },
+    {
+      "epoch": 1.1904761904761905,
+      "grad_norm": 12.25,
+      "learning_rate": 1.2066486972147351e-05,
+      "loss": 0.358671875,
+      "step": 2650
+    },
+    {
+      "epoch": 1.2129380053908356,
+      "grad_norm": 12.6875,
+      "learning_rate": 1.191674153938305e-05,
+      "loss": 0.32927734375,
+      "step": 2700
+    },
+    {
+      "epoch": 1.2353998203054806,
+      "grad_norm": 11.1875,
+      "learning_rate": 1.176699610661875e-05,
+      "loss": 0.3071875,
+      "step": 2750
+    },
+    {
+      "epoch": 1.2578616352201257,
+      "grad_norm": 10.5,
+      "learning_rate": 1.1617250673854449e-05,
+      "loss": 0.315390625,
+      "step": 2800
+    },
+    {
+      "epoch": 1.280323450134771,
+      "grad_norm": 8.125,
+      "learning_rate": 1.1467505241090146e-05,
+      "loss": 0.33751953125,
+      "step": 2850
+    },
+    {
+      "epoch": 1.302785265049416,
+      "grad_norm": 10.25,
+      "learning_rate": 1.1317759808325847e-05,
+      "loss": 0.39599609375,
+      "step": 2900
+    },
+    {
+      "epoch": 1.3252470799640612,
+      "grad_norm": 10.3125,
+      "learning_rate": 1.1168014375561546e-05,
+      "loss": 0.3711328125,
+      "step": 2950
+    },
+    {
+      "epoch": 1.3477088948787062,
+      "grad_norm": 13.875,
+      "learning_rate": 1.1018268942797245e-05,
+      "loss": 0.370859375,
+      "step": 3000
+    },
+    {
+      "epoch": 1.3701707097933513,
+      "grad_norm": 8.875,
+      "learning_rate": 1.0868523510032945e-05,
+      "loss": 0.317587890625,
+      "step": 3050
+    },
+    {
+      "epoch": 1.3926325247079965,
+      "grad_norm": 8.3125,
+      "learning_rate": 1.0718778077268644e-05,
+      "loss": 0.365966796875,
+      "step": 3100
+    },
+    {
+      "epoch": 1.4150943396226414,
+      "grad_norm": 8.0,
+      "learning_rate": 1.0569032644504345e-05,
+      "loss": 0.3309375,
+      "step": 3150
+    },
+    {
+      "epoch": 1.4375561545372866,
+      "grad_norm": 11.5625,
+      "learning_rate": 1.0419287211740044e-05,
+      "loss": 0.3394921875,
+      "step": 3200
+    },
+    {
+      "epoch": 1.4600179694519317,
+      "grad_norm": 13.5625,
+      "learning_rate": 1.0269541778975742e-05,
+      "loss": 0.33572265625,
+      "step": 3250
+    },
+    {
+      "epoch": 1.482479784366577,
+      "grad_norm": 13.1875,
+      "learning_rate": 1.011979634621144e-05,
+      "loss": 0.296796875,
+      "step": 3300
+    },
+    {
+      "epoch": 1.504941599281222,
+      "grad_norm": 12.6875,
+      "learning_rate": 9.97005091344714e-06,
+      "loss": 0.370859375,
+      "step": 3350
+    },
+    {
+      "epoch": 1.527403414195867,
+      "grad_norm": 23.5,
+      "learning_rate": 9.820305480682841e-06,
+      "loss": 0.32857421875,
+      "step": 3400
+    },
+    {
+      "epoch": 1.5498652291105122,
+      "grad_norm": 17.5,
+      "learning_rate": 9.670560047918538e-06,
+      "loss": 0.31537109375,
+      "step": 3450
+    },
+    {
+      "epoch": 1.5723270440251573,
+      "grad_norm": 8.25,
+      "learning_rate": 9.52081461515424e-06,
+      "loss": 0.30197265625,
+      "step": 3500
+    },
+    {
+      "epoch": 1.5947888589398023,
+      "grad_norm": 6.4375,
+      "learning_rate": 9.371069182389939e-06,
+      "loss": 0.30685546875,
+      "step": 3550
+    },
+    {
+      "epoch": 1.6172506738544474,
+      "grad_norm": 14.0,
+      "learning_rate": 9.221323749625636e-06,
+      "loss": 0.333203125,
+      "step": 3600
+    },
+    {
+      "epoch": 1.6397124887690926,
+      "grad_norm": 11.5,
+      "learning_rate": 9.071578316861337e-06,
+      "loss": 0.34734375,
+      "step": 3650
+    },
+    {
+      "epoch": 1.6621743036837375,
+      "grad_norm": 10.625,
+      "learning_rate": 8.921832884097036e-06,
+      "loss": 0.344521484375,
+      "step": 3700
+    },
+    {
+      "epoch": 1.684636118598383,
+      "grad_norm": 11.875,
+      "learning_rate": 8.772087451332735e-06,
+      "loss": 0.36986328125,
+      "step": 3750
+    },
+    {
+      "epoch": 1.7070979335130279,
+      "grad_norm": 10.8125,
+      "learning_rate": 8.622342018568435e-06,
+      "loss": 0.33515625,
+      "step": 3800
+    },
+    {
+      "epoch": 1.7295597484276728,
+      "grad_norm": 6.5625,
+      "learning_rate": 8.472596585804134e-06,
+      "loss": 0.35638671875,
+      "step": 3850
+    },
+    {
+      "epoch": 1.7520215633423182,
+      "grad_norm": 8.1875,
+      "learning_rate": 8.322851153039833e-06,
+      "loss": 0.3527734375,
+      "step": 3900
+    },
+    {
+      "epoch": 1.7744833782569631,
+      "grad_norm": 11.3125,
+      "learning_rate": 8.173105720275532e-06,
+      "loss": 0.36638671875,
+      "step": 3950
+    },
+    {
+      "epoch": 1.7969451931716083,
+      "grad_norm": 12.75,
+      "learning_rate": 8.023360287511232e-06,
+      "loss": 0.3379296875,
+      "step": 4000
+    },
+    {
+      "epoch": 1.8194070080862534,
+      "grad_norm": 7.96875,
+      "learning_rate": 7.87361485474693e-06,
+      "loss": 0.3413671875,
+      "step": 4050
+    },
+    {
+      "epoch": 1.8418688230008984,
+      "grad_norm": 10.875,
+      "learning_rate": 7.72386942198263e-06,
+      "loss": 0.342890625,
+      "step": 4100
+    },
+    {
+      "epoch": 1.8643306379155435,
+      "grad_norm": 12.75,
+      "learning_rate": 7.574123989218329e-06,
+      "loss": 0.3216015625,
+      "step": 4150
+    },
+    {
+      "epoch": 1.8867924528301887,
+      "grad_norm": 90.0,
+      "learning_rate": 7.4243785564540284e-06,
+      "loss": 0.341484375,
+      "step": 4200
+    },
+    {
+      "epoch": 1.9092542677448336,
+      "grad_norm": 6.28125,
+      "learning_rate": 7.2746331236897285e-06,
+      "loss": 0.33556640625,
+      "step": 4250
+    },
+    {
+      "epoch": 1.931716082659479,
+      "grad_norm": 12.9375,
+      "learning_rate": 7.124887690925427e-06,
+      "loss": 0.36302734375,
+      "step": 4300
+    },
+    {
+      "epoch": 1.954177897574124,
+      "grad_norm": 14.4375,
+      "learning_rate": 6.975142258161126e-06,
+      "loss": 0.3556640625,
+      "step": 4350
+    },
+    {
+      "epoch": 1.9766397124887691,
+      "grad_norm": 10.75,
+      "learning_rate": 6.825396825396826e-06,
+      "loss": 0.271953125,
+      "step": 4400
+    },
+    {
+      "epoch": 1.9991015274034143,
+      "grad_norm": 12.375,
+      "learning_rate": 6.675651392632525e-06,
+      "loss": 0.338671875,
+      "step": 4450
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.4453125,
+      "eval_runtime": 2.9981,
+      "eval_samples_per_second": 112.736,
+      "eval_steps_per_second": 3.669,
+      "step": 4452
+    },
+    {
+      "epoch": 2.0215633423180592,
+      "grad_norm": 12.4375,
+      "learning_rate": 6.5259059598682255e-06,
+      "loss": 0.36619140625,
+      "step": 4500
+    },
+    {
+      "epoch": 2.0440251572327046,
+      "grad_norm": 17.25,
+      "learning_rate": 6.376160527103924e-06,
+      "loss": 0.38908203125,
+      "step": 4550
+    },
+    {
+      "epoch": 2.0664869721473496,
+      "grad_norm": 8.625,
+      "learning_rate": 6.226415094339623e-06,
+      "loss": 0.35123046875,
+      "step": 4600
+    },
+    {
+      "epoch": 2.0889487870619945,
+      "grad_norm": 13.0625,
+      "learning_rate": 6.076669661575323e-06,
+      "loss": 0.36943359375,
+      "step": 4650
+    },
+    {
+      "epoch": 2.11141060197664,
+      "grad_norm": 12.8125,
+      "learning_rate": 5.9269242288110215e-06,
+      "loss": 0.35146484375,
+      "step": 4700
+    },
+    {
+      "epoch": 2.133872416891285,
+      "grad_norm": 10.9375,
+      "learning_rate": 5.777178796046721e-06,
+      "loss": 0.3725390625,
+      "step": 4750
+    },
+    {
+      "epoch": 2.1563342318059298,
+      "grad_norm": 13.75,
+      "learning_rate": 5.627433363282421e-06,
+      "loss": 0.36087890625,
+      "step": 4800
+    },
+    {
+      "epoch": 2.178796046720575,
+      "grad_norm": 14.0625,
+      "learning_rate": 5.47768793051812e-06,
+      "loss": 0.36873046875,
+      "step": 4850
+    },
+    {
+      "epoch": 2.20125786163522,
+      "grad_norm": 15.1875,
+      "learning_rate": 5.327942497753818e-06,
+      "loss": 0.35255859375,
+      "step": 4900
+    },
+    {
+      "epoch": 2.223719676549865,
+      "grad_norm": 11.5625,
+      "learning_rate": 5.1781970649895185e-06,
+      "loss": 0.3751953125,
+      "step": 4950
+    },
+    {
+      "epoch": 2.2461814914645104,
+      "grad_norm": 14.0,
+      "learning_rate": 5.028451632225218e-06,
+      "loss": 0.384609375,
+      "step": 5000
+    },
+    {
+      "epoch": 2.2686433063791553,
+      "grad_norm": 18.5,
+      "learning_rate": 4.878706199460917e-06,
+      "loss": 0.37974609375,
+      "step": 5050
+    },
+    {
+      "epoch": 2.2911051212938007,
+      "grad_norm": 13.875,
+      "learning_rate": 4.728960766696616e-06,
+      "loss": 0.35677734375,
+      "step": 5100
+    },
+    {
+      "epoch": 2.3135669362084457,
+      "grad_norm": 14.9375,
+      "learning_rate": 4.579215333932315e-06,
+      "loss": 0.33296875,
+      "step": 5150
+    },
+    {
+      "epoch": 2.3360287511230906,
+      "grad_norm": 16.25,
+      "learning_rate": 4.429469901168015e-06,
+      "loss": 0.3341796875,
+      "step": 5200
+    },
+    {
+      "epoch": 2.358490566037736,
+      "grad_norm": 12.4375,
+      "learning_rate": 4.279724468403715e-06,
+      "loss": 0.3711328125,
+      "step": 5250
+    },
+    {
+      "epoch": 2.380952380952381,
+      "grad_norm": 13.5625,
+      "learning_rate": 4.129979035639413e-06,
+      "loss": 0.3920703125,
+      "step": 5300
+    },
+    {
+      "epoch": 2.403414195867026,
+      "grad_norm": 10.25,
+      "learning_rate": 3.980233602875112e-06,
+      "loss": 0.32033203125,
+      "step": 5350
+    },
+    {
+      "epoch": 2.4258760107816713,
+      "grad_norm": 10.0625,
+      "learning_rate": 3.830488170110812e-06,
+      "loss": 0.345234375,
+      "step": 5400
+    },
+    {
+      "epoch": 2.448337825696316,
+      "grad_norm": 15.8125,
+      "learning_rate": 3.680742737346511e-06,
+      "loss": 0.40408203125,
+      "step": 5450
+    },
+    {
+      "epoch": 2.470799640610961,
+      "grad_norm": 12.8125,
+      "learning_rate": 3.5309973045822103e-06,
+      "loss": 0.345,
+      "step": 5500
+    },
+    {
+      "epoch": 2.4932614555256065,
+      "grad_norm": 15.0,
+      "learning_rate": 3.38125187181791e-06,
+      "loss": 0.3587109375,
+      "step": 5550
+    },
+    {
+      "epoch": 2.5157232704402515,
+      "grad_norm": 13.75,
+      "learning_rate": 3.231506439053609e-06,
+      "loss": 0.37056640625,
+      "step": 5600
+    },
+    {
+      "epoch": 2.538185085354897,
+      "grad_norm": 9.0,
+      "learning_rate": 3.0817610062893084e-06,
+      "loss": 0.352724609375,
+      "step": 5650
+    },
+    {
+      "epoch": 2.560646900269542,
+      "grad_norm": 12.5625,
+      "learning_rate": 2.9320155735250076e-06,
+      "loss": 0.3362890625,
+      "step": 5700
+    },
+    {
+      "epoch": 2.5831087151841867,
+      "grad_norm": 13.4375,
+      "learning_rate": 2.7822701407607073e-06,
+      "loss": 0.3505078125,
+      "step": 5750
+    },
+    {
+      "epoch": 2.605570530098832,
+      "grad_norm": 14.0,
+      "learning_rate": 2.632524707996406e-06,
+      "loss": 0.401796875,
+      "step": 5800
+    },
+    {
+      "epoch": 2.628032345013477,
+      "grad_norm": 12.9375,
+      "learning_rate": 2.4827792752321057e-06,
+      "loss": 0.3735546875,
+      "step": 5850
+    },
+    {
+      "epoch": 2.6504941599281224,
+      "grad_norm": 11.375,
+      "learning_rate": 2.333033842467805e-06,
+      "loss": 0.3434375,
+      "step": 5900
+    },
+    {
+      "epoch": 2.6729559748427674,
+      "grad_norm": 10.9375,
+      "learning_rate": 2.183288409703504e-06,
+      "loss": 0.34935546875,
+      "step": 5950
+    },
+    {
+      "epoch": 2.6954177897574123,
+      "grad_norm": 11.0,
+      "learning_rate": 2.0335429769392034e-06,
+      "loss": 0.3768359375,
+      "step": 6000
+    },
+    {
+      "epoch": 2.7178796046720572,
+      "grad_norm": 18.0,
+      "learning_rate": 1.8837975441749028e-06,
+      "loss": 0.35380859375,
+      "step": 6050
+    },
+    {
+      "epoch": 2.7403414195867026,
+      "grad_norm": 12.125,
+      "learning_rate": 1.734052111410602e-06,
+      "loss": 0.3534765625,
+      "step": 6100
+    },
+    {
+      "epoch": 2.7628032345013476,
+      "grad_norm": 10.375,
+      "learning_rate": 1.5843066786463015e-06,
+      "loss": 0.382421875,
+      "step": 6150
+    },
+    {
+      "epoch": 2.785265049415993,
+      "grad_norm": 10.75,
+      "learning_rate": 1.4345612458820007e-06,
+      "loss": 0.31984375,
+      "step": 6200
+    },
+    {
+      "epoch": 2.807726864330638,
+      "grad_norm": 13.3125,
+      "learning_rate": 1.2848158131177e-06,
+      "loss": 0.37189453125,
+      "step": 6250
+    },
+    {
+      "epoch": 2.830188679245283,
+      "grad_norm": 13.875,
+      "learning_rate": 1.1350703803533992e-06,
+      "loss": 0.37873046875,
+      "step": 6300
+    },
+    {
+      "epoch": 2.852650494159928,
+      "grad_norm": 8.8125,
+      "learning_rate": 9.853249475890986e-07,
+      "loss": 0.344921875,
+      "step": 6350
+    },
+    {
+      "epoch": 2.875112309074573,
+      "grad_norm": 11.625,
+      "learning_rate": 8.355795148247979e-07,
+      "loss": 0.3953515625,
+      "step": 6400
+    },
+    {
+      "epoch": 2.8975741239892185,
+      "grad_norm": 15.6875,
+      "learning_rate": 6.858340820604972e-07,
+      "loss": 0.34615234375,
+      "step": 6450
+    },
+    {
+      "epoch": 2.9200359389038635,
+      "grad_norm": 11.8125,
+      "learning_rate": 5.360886492961965e-07,
+      "loss": 0.3701953125,
+      "step": 6500
+    },
+    {
+      "epoch": 2.9424977538185084,
+      "grad_norm": 184.0,
+      "learning_rate": 3.863432165318958e-07,
+      "loss": 0.3726953125,
+      "step": 6550
+    },
+    {
+      "epoch": 2.964959568733154,
+      "grad_norm": 6.84375,
+      "learning_rate": 2.365977837675951e-07,
+      "loss": 0.327265625,
+      "step": 6600
+    },
+    {
+      "epoch": 2.9874213836477987,
+      "grad_norm": 15.625,
+      "learning_rate": 8.685235100329441e-08,
+      "loss": 0.36375,
+      "step": 6650
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.443925678730011,
+      "eval_runtime": 2.6416,
+      "eval_samples_per_second": 127.953,
+      "eval_steps_per_second": 4.164,
+      "step": 6678
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 6678,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

ncbi-MedCPT-Cross-Encoder-E3-S1-Mpairwise-FullDataTrue/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90ac4a19c60de832e6829401bdbbab0d9ec1dc5d343ad75f0ee82a619c288532
+size 5329