Added google-bert-base based cross encoder checkpoint

by king17pvp - opened May 4

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+59215

-0

Files changed (11) hide show

crossencoder-checkpoints/checkpoint-googlebert-10000/README.md +523 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/config.json +35 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/model.safetensors +3 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/rng_state.pth +3 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/scheduler.pt +3 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/special_tokens_map.json +7 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/tokenizer.json +0 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/tokenizer_config.json +58 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/trainer_state.json +426 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/training_args.bin +3 -0
crossencoder-checkpoints/checkpoint-googlebert-10000/vocab.txt +0 -0

crossencoder-checkpoints/checkpoint-googlebert-10000/README.md ADDED Viewed

	@@ -0,0 +1,523 @@

+---
+language:
+- en
+license: apache-2.0
+tags:
+- sentence-transformers
+- cross-encoder
+- generated_from_trainer
+- dataset_size:330152
+- loss:CachedMultipleNegativesRankingLoss
+base_model: google-bert/bert-base-cased
+pipeline_tag: text-ranking
+library_name: sentence-transformers
+metrics:
+- map
+- mrr@10
+- ndcg@10
+model-index:
+- name: CrossEncoder based on google-bert/bert-base-cased
+  results:
+  - task:
+      type: cross-encoder-reranking
+      name: Cross Encoder Reranking
+    dataset:
+      name: dev
+      type: dev
+    metrics:
+    - type: map
+      value: 0.9441464035183368
+      name: Map
+    - type: mrr@10
+      value: 0.9441464035183368
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.9703932950632154
+      name: Ndcg@10
+  - task:
+      type: cross-encoder-reranking
+      name: Cross Encoder Reranking
+    dataset:
+      name: NanoNQ R100
+      type: NanoNQ_R100
+    metrics:
+    - type: map
+      value: 0.2676
+      name: Map
+    - type: mrr@10
+      value: 0.304
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.3307
+      name: Ndcg@10
+  - task:
+      type: cross-encoder-reranking
+      name: Cross Encoder Reranking
+    dataset:
+      name: NanoSCIDOCS R100
+      type: NanoSCIDOCS_R100
+    metrics:
+    - type: map
+      value: 0.2425
+      name: Map
+    - type: mrr@10
+      value: 0.5271
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.2968
+      name: Ndcg@10
+  - task:
+      type: cross-encoder-reranking
+      name: Cross Encoder Reranking
+    dataset:
+      name: NanoSciFact R100
+      type: NanoSciFact_R100
+    metrics:
+    - type: map
+      value: 0.6758
+      name: Map
+    - type: mrr@10
+      value: 0.6809
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.7085
+      name: Ndcg@10
+  - task:
+      type: cross-encoder-nano-beir
+      name: Cross Encoder Nano BEIR
+    dataset:
+      name: NanoBEIR R100 mean
+      type: NanoBEIR_R100_mean
+    metrics:
+    - type: map
+      value: 0.3953
+      name: Map
+    - type: mrr@10
+      value: 0.504
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.4453
+      name: Ndcg@10
+---
+# CrossEncoder based on google-bert/bert-base-cased
+This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [google-bert/bert-base-cased](https://huggingface.co/google-bert/bert-base-cased) using the [sentence-transformers](https://www.SBERT.net) library. It computes scores for pairs of texts, which can be used for text reranking and semantic search.
+## Model Details
+### Model Description
+- **Model Type:** Cross Encoder
+- **Base model:** [google-bert/bert-base-cased](https://huggingface.co/google-bert/bert-base-cased) <!-- at revision cd5ef92a9fb2f889e972770a36d4ed042daf221e -->
+- **Maximum Sequence Length:** 512 tokens
+- **Number of Output Labels:** 1 label
+<!-- - **Training Dataset:** Unknown -->
+- **Language:** en
+- **License:** apache-2.0
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Documentation:** [Cross Encoder Documentation](https://www.sbert.net/docs/cross_encoder/usage/usage.html)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Cross Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=cross-encoder)
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import CrossEncoder
+# Download from the 🤗 Hub
+model = CrossEncoder("cross_encoder_model_id")
+# Get scores for pairs of texts
+pairs = [
+    ['neuropeptides in small intestine enteric system', 'the enteric system in the gut wall (figure 6–2) is the most extensively studied system containing nanc neurons in addition to cholinergic and adrenergic fibers. in the small intestine, for example, these neurons contain one or more of the following: nitric oxide synthase (which produces nitric oxide, no), calcitonin gene-related peptide, cholecystokinin, dynorphin, enkephalins, gastrin-releasing peptide, 5-hydroxytryptamine (5-ht, serotonin), neuropeptide y, somatostatin, substance p, and vasoactive intestinal peptide (vip). some neurons contain as many as five different transmitters.'],
+    ['how does the timing of rubella virus infection during pregnancy affect the outcome for the fetus?', 'congenital rubella syndrome the most serious consequence of rubella virus infection can develop when a woman becomes infected during pregnancy, particularly during the first trimester. the resulting complications may include miscarriage, fetal death, premature delivery, or live birth with congenital defects. infants infected with rubella virus in utero may have myriad physical defects (table 230e-1), which most commonly relate to the eyes, ears, and heart. this constellation of severe birth defects is known as congenital rubella syndrome. in addition to permanent manifestations, there are a host of transient physical manifestations, including thrombocytopenia with purpura/petechiae (e.g., dermal erythropoiesis, “blueberry muffin syndrome”). some infants may be born with congenital rubella virus infection but have no apparent signs or symptoms of crs and are referred to as “infants with congenital rubella infection only.”'],
+    ['structure and function of β barrels in membrane proteins', 'most multipass membrane proteins in eukaryotic cells and in the bacterial plasma membrane are constructed from transmembrane α helices. the helices figure 10–22 steps in the folding of a multipass transmembrane protein. when a newly synthesized transmembrane α helix is released into the lipid bilayer, it is initially surrounded by lipid molecules. as the protein folds, contacts between the helices displace some of the lipid molecules surrounding the helices. figure 10–23 β barrels formed from different numbers of β strands.'],
+    ['c1 complex activation in classical complement pathway', 'c1 complex, c1 protein complex activated as the first step in the classical pathway of complement activation, composed of c1q bound to two molecules each of the proteases c1r and c1s. binding of a pathogen or antibody to c1q activates c1r, which cleaves and activates c1s, which cleaves c4 and c2. c1 inhibitor (c1inh) an inhibitor protein for c1 that binds and inactivates c1r:c1s enzymatic activity. deficiency in c1inh causes hereditary angioedema through production of vasoactive peptides that cause subcutaneous and laryngeal swelling. c2 complement protein of the classical and lectin pathways that is cleaved by the c1 complex to yield c2b and c2a. c2a is an active protease that forms part of the classical c3 convertase c4bc2a. c3 complement protein on which all complement activation pathways converge. c3 cleavage forms c3b, which can bind covalently to microbial surfaces, where it promotes destruction by phagocytes.'],
+    ['eortc trial neoadjuvant chemotherapy advanced ovarian cancer', 'the eortc completed a large randomized trial in 718 patients with advanced ovarian cancer comparing initial surgery followed by six cycles of carboplatin and paclitaxel with three cycles of neoadjuvant chemotherapy followed by surgical debulking and another three cycles of chemotherapy. the study found that the progression-free survival was identical in both arms (12 months) and similarly the overall survival (30 months) was the same in both arms (221). the morbidity of surgery was significantly less in patients receiving neoadjuvant chemotherapy, suggesting that in selected patients with very advanced (stages iiic and iv) ovarian cancer two to three cycles of neoadjuvant chemotherapy prior to surgical debulking is a reasonable option.'],
+]
+scores = model.predict(pairs)
+print(scores.shape)
+# (5,)
+# Or rank different texts based on similarity to a single text
+ranks = model.rank(
+    'neuropeptides in small intestine enteric system',
+    [
+        'the enteric system in the gut wall (figure 6–2) is the most extensively studied system containing nanc neurons in addition to cholinergic and adrenergic fibers. in the small intestine, for example, these neurons contain one or more of the following: nitric oxide synthase (which produces nitric oxide, no), calcitonin gene-related peptide, cholecystokinin, dynorphin, enkephalins, gastrin-releasing peptide, 5-hydroxytryptamine (5-ht, serotonin), neuropeptide y, somatostatin, substance p, and vasoactive intestinal peptide (vip). some neurons contain as many as five different transmitters.',
+        'congenital rubella syndrome the most serious consequence of rubella virus infection can develop when a woman becomes infected during pregnancy, particularly during the first trimester. the resulting complications may include miscarriage, fetal death, premature delivery, or live birth with congenital defects. infants infected with rubella virus in utero may have myriad physical defects (table 230e-1), which most commonly relate to the eyes, ears, and heart. this constellation of severe birth defects is known as congenital rubella syndrome. in addition to permanent manifestations, there are a host of transient physical manifestations, including thrombocytopenia with purpura/petechiae (e.g., dermal erythropoiesis, “blueberry muffin syndrome”). some infants may be born with congenital rubella virus infection but have no apparent signs or symptoms of crs and are referred to as “infants with congenital rubella infection only.”',
+        'most multipass membrane proteins in eukaryotic cells and in the bacterial plasma membrane are constructed from transmembrane α helices. the helices figure 10–22 steps in the folding of a multipass transmembrane protein. when a newly synthesized transmembrane α helix is released into the lipid bilayer, it is initially surrounded by lipid molecules. as the protein folds, contacts between the helices displace some of the lipid molecules surrounding the helices. figure 10–23 β barrels formed from different numbers of β strands.',
+        'c1 complex, c1 protein complex activated as the first step in the classical pathway of complement activation, composed of c1q bound to two molecules each of the proteases c1r and c1s. binding of a pathogen or antibody to c1q activates c1r, which cleaves and activates c1s, which cleaves c4 and c2. c1 inhibitor (c1inh) an inhibitor protein for c1 that binds and inactivates c1r:c1s enzymatic activity. deficiency in c1inh causes hereditary angioedema through production of vasoactive peptides that cause subcutaneous and laryngeal swelling. c2 complement protein of the classical and lectin pathways that is cleaved by the c1 complex to yield c2b and c2a. c2a is an active protease that forms part of the classical c3 convertase c4bc2a. c3 complement protein on which all complement activation pathways converge. c3 cleavage forms c3b, which can bind covalently to microbial surfaces, where it promotes destruction by phagocytes.',
+        'the eortc completed a large randomized trial in 718 patients with advanced ovarian cancer comparing initial surgery followed by six cycles of carboplatin and paclitaxel with three cycles of neoadjuvant chemotherapy followed by surgical debulking and another three cycles of chemotherapy. the study found that the progression-free survival was identical in both arms (12 months) and similarly the overall survival (30 months) was the same in both arms (221). the morbidity of surgery was significantly less in patients receiving neoadjuvant chemotherapy, suggesting that in selected patients with very advanced (stages iiic and iv) ovarian cancer two to three cycles of neoadjuvant chemotherapy prior to surgical debulking is a reasonable option.',
+    ]
+)
+# [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Cross Encoder Reranking
+* Dataset: `dev`
+* Evaluated with [<code>CrossEncoderRerankingEvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderRerankingEvaluator) with these parameters:
+  ```json
+  {
+      "at_k": 10
+  }
+  ```
+| Metric      | Value      |
+|:------------|:-----------|
+| map         | 0.9441     |
+| mrr@10      | 0.9441     |
+| **ndcg@10** | **0.9704** |
+#### Cross Encoder Reranking
+* Datasets: `NanoNQ_R100`, `NanoSCIDOCS_R100` and `NanoSciFact_R100`
+* Evaluated with [<code>CrossEncoderRerankingEvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderRerankingEvaluator) with these parameters:
+  ```json
+  {
+      "at_k": 10,
+      "always_rerank_positives": true
+  }
+  ```
+| Metric      | NanoNQ_R100          | NanoSCIDOCS_R100     | NanoSciFact_R100     |
+|:------------|:---------------------|:---------------------|:---------------------|
+| map         | 0.2676 (-0.1520)     | 0.2425 (-0.0318)     | 0.6758 (+0.0060)     |
+| mrr@10      | 0.3040 (-0.1227)     | 0.5271 (-0.0324)     | 0.6809 (+0.0028)     |
+| **ndcg@10** | **0.3307 (-0.1699)** | **0.2968 (-0.0384)** | **0.7085 (-0.0014)** |
+#### Cross Encoder Nano BEIR
+* Dataset: `NanoBEIR_R100_mean`
+* Evaluated with [<code>CrossEncoderNanoBEIREvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderNanoBEIREvaluator) with these parameters:
+  ```json
+  {
+      "dataset_names": [
+          "nq",
+          "scidocs",
+          "scifact"
+      ],
+      "rerank_k": 100,
+      "at_k": 10,
+      "always_rerank_positives": true
+  }
+  ```
+| Metric      | Value                |
+|:------------|:---------------------|
+| map         | 0.3953 (-0.0593)     |
+| mrr@10      | 0.5040 (-0.0508)     |
+| **ndcg@10** | **0.4453 (-0.0699)** |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 330,152 training samples
+* Columns: <code>query</code>, <code>document</code>, <code>negative_1</code>, <code>negative_2</code>, <code>negative_3</code>, and <code>negative_4</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | query                                                                                           | document                                                                                        | negative_1                                                                                       | negative_2                                                                                       | negative_3                                                                                       | negative_4                                                                                       |
+  |:--------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|
+  | type    | string                                                                                          | string                                                                                          | string                                                                                           | string                                                                                           | string                                                                                           | string                                                                                           |
+  | details | <ul><li>min: 29 characters</li><li>mean: 62.28 characters</li><li>max: 149 characters</li></ul> | <ul><li>min: 4 characters</li><li>mean: 774.71 characters</li><li>max: 999 characters</li></ul> | <ul><li>min: 49 characters</li><li>mean: 786.29 characters</li><li>max: 999 characters</li></ul> | <ul><li>min: 22 characters</li><li>mean: 797.75 characters</li><li>max: 999 characters</li></ul> | <ul><li>min: 39 characters</li><li>mean: 805.68 characters</li><li>max: 999 characters</li></ul> | <ul><li>min: 27 characters</li><li>mean: 787.87 characters</li><li>max: 999 characters</li></ul> |
+* Samples:
+  | query                                                                                                          | document                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | negative_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | negative_2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | negative_3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | negative_4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+  |:---------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>neuropeptides in small intestine enteric system</code>                                                   | <code>the enteric system in the gut wall (figure 6–2) is the most extensively studied system containing nanc neurons in addition to cholinergic and adrenergic fibers. in the small intestine, for example, these neurons contain one or more of the following: nitric oxide synthase (which produces nitric oxide, no), calcitonin gene-related peptide, cholecystokinin, dynorphin, enkephalins, gastrin-releasing peptide, 5-hydroxytryptamine (5-ht, serotonin), neuropeptide y, somatostatin, substance p, and vasoactive intestinal peptide (vip). some neurons contain as many as five different transmitters.</code>                                                                                                                                                                                                                                                                                                                                                        | <code>28-4). small-intestinal absorption and secretion are tightly regulated; derangements in water and electrolyte homeostasis characteristic of many of the disorders discussed in this chapter play an important role in contributing to their associated clinical features.gut epithelia have two pathways for water transport: (a) the paracellular route, which involves transport through the spaces between cells, (b) the transcellular route, through apical and the basolateral cell membranes, with most occurring through brunicardi_ch28_p1219-p1258.indd 122223/02/19 2:24 pm 1223small intestinechapter 28the transcellular pathway.4 the specific transport mechanisms mediating this transcellular transport are not completely char-acterized, and they may involve passive diffusion through the phospholipid bilayer, cotransport with other ions and nutrients, or diffusion through water channels called aquaporins. many different types of aquaporins have been identified; however, their contribution to</code> | <code>like extensions of the apical surface of each intesti-nal epithelial cell (enterocyte), further increase the surface for absorption of metabolites. mucosal glands extend into the lamina propria. they contain the stem cells and developing cells that will ultimately migrate to the surface of the villi. in the duodenum, submucosal glands (brunner’s glands) secrete an alkaline mucus that helps to neutralize the acidic chyme. enterocytes not only absorb metabolites digested in the intestinal lumen but also synthesize enzymes inserted into the membrane of the mi-crovilli for terminal digestion of disaccharides and dipeptides.</code>                                                                                                                                                                                                                                                                                                                                                                                 | <code>a well-known substance that appears to act as a paracrine hormone within the gastrointestinal tract and pancreas is somatostatin, which inhibits other gas-trointestinal and pancreatic islet endocrine cells. in addition to the established gastrointestinal hormones, several gastrointestinal peptides have not been definitely classified as hormones or paracrine hormones. these pep-tides are designated candidate or putative hormones. other locally active agents isolated from the gastroin-testinal mucosa are neurotransmitters. these agents are released from nerve endings close to the target cell, usu-ally the smooth muscle of the muscularis mucosae, the muscularis externa, or the tunica media of a blood vessel. enteroendocrine cells can also secrete neurotransmitters that activate afferent neurons, sending signals to the cns and enteric division of autonomic nervous system. in addi-tion to acetylcholine (not a peptide), peptides found in nerve fibers of the gastrointestinal tract are</code> | <code>activity in the enteric nervous system is modulated by the sympathetic nervous system. sympathetic post-ganglionic neurons that contain norepinephrine inhibit intestinal motility, those that contain norepinephrine and neuropeptide y regulate blood flow, and those that contain norepinephrine and somatostatin control intestinal secretion. feedback is provided by intestinofugal neurons that project back from the myenteric plexus to the sympathetic ganglia. the submucosal plexus regulates ion and water transport across the intestinal epithelium and glandular secretion. it also communicates with the myenteric plexus to ensure coordination of the functions of the two components of the enteric nervous system. the neurons and neural circuits of the submucosal plexus are not as well understood as those of the myenteric plexus, but many of the neurons contain neuropeptides, and the neural networks are well organized.</code>                                                                         |
+  | <code>how does the timing of rubella virus infection during pregnancy affect the outcome for the fetus?</code> | <code>congenital rubella syndrome the most serious consequence of rubella virus infection can develop when a woman becomes infected during pregnancy, particularly during the first trimester. the resulting complications may include miscarriage, fetal death, premature delivery, or live birth with congenital defects. infants infected with rubella virus in utero may have myriad physical defects (table 230e-1), which most commonly relate to the eyes, ears, and heart. this constellation of severe birth defects is known as congenital rubella syndrome. in addition to permanent manifestations, there are a host of transient physical manifestations, including thrombocytopenia with purpura/petechiae (e.g., dermal erythropoiesis, “blueberry muffin syndrome”). some infants may be born with congenital rubella virus infection but have no apparent signs or symptoms of crs and are referred to as “infants with congenital rubella infection only.”</code> | <code>figure 230e-2 countries using rubella vaccine in their national immunization schedule, 2012. (from the world health organization.) is probably lifelong. the most commonly used vaccine globally is the ra27/3 virus strain. the current recommendation for routine rubella vaccination in the united states is a first dose of mmr vaccine at 12–15 months of age and a second dose at 4–6 years. target groups for rubella vaccine include children ≥1 year of age, adolescents and adults without documented evidence of immunity, individuals in congregate settings (e.g., college students, military personnel, child care and health care workers), and susceptible women before and after pregnancy.</code>                                                                                                                                                                                                                                                                                                                   | <code>the neuropathology is of considerable interest. in the nervous system of fetuses exposed to maternal rubella in the first trimester, r.d. adams found no visible lesions by light microscopy, even though the virus had been isolated from the brain by enders (personal communications). at this period of development there is no inflammatory reaction because of the absence of polymorphonuclear leukocytes, lymphocytes, and other mononuclear cells in the fetus. at birth the brain is usually of normal size, and there may be no discernible lesions. there may be a mild meningeal infiltration of lymphocytes, and a few zones of necrosis and vasculitis with later calcification of vessels are seen, as are small hemorrhages, presumably related to the thrombocytopenia. smallness of the brain and delay in myelination have been observed in children who died at 1 to 2 years of age. none of the brains in adams’ series was malformed. rubella virus continues to be recoverable from the csf for at least 18</code> | <code>rubella (german measles) also spreads from the hairline downward; unlike that of measles, however, the rash of rubella tends to clear from originally affected areas as it migrates, and it may be pruritic (chap. 230e). forchheimer spots (palatal petechiae) may develop but are nonspecific because they also develop in infectious mononucleosis (chap. 218) and scarlet fever (chap. 173). postauricular and suboccipital adenopathy and arthritis are common among adults with rubella. exposure of pregnant women to ill individuals should be avoided, as rubella causes severe congenital abnormalities. numerous strains of enteroviruses (chap. 228), primarily echoviruses and coxsackieviruses, cause nonspecific syndromes of fever and eruptions that may mimic rubella or measles. patients with infectious mononucleosis caused by epstein-barr virus (chap. 218) or with primary hiv infection (chap. 226) may exhibit pharyngitis, lymphadenopathy, and a nonspecific maculopapular exanthem.</code>                | <code>after the isolation of rubella virus in the early 1960s and the occurrence of a devastating pandemic, a vaccine for rubella was developed and licensed in 1969. currently, the majority of rubella-containing vaccines (rcvs) used worldwide are combined measles and rubella (mr) or measles, mumps, and rubella (mmr) formulations. a tetravalent measles, mumps, rubella, and varicella (mmrv) vaccine is available but is not widely used. the public health burden of rubella infection is measured primarily through the resulting crs cases. the 1964–1965 rubella epidemic in the united states encompassed >30,000 infections during pregnancy. crs occurred in ~20,000 infants born alive, including >11,000 infants who were deaf, >3500 infants who were blind, and almost 2000 infants who were mentally retarded. the cost of this epidemic exceeded $1.5 billion. in 1983, the cost per child with crs was estimated at $200,000.</code>                                                                                 |
+  | <code>structure and function of β barrels in membrane proteins</code>                                          | <code>most multipass membrane proteins in eukaryotic cells and in the bacterial plasma membrane are constructed from transmembrane α helices. the helices figure 10–22 steps in the folding of a multipass transmembrane protein. when a newly synthesized transmembrane α helix is released into the lipid bilayer, it is initially surrounded by lipid molecules. as the protein folds, contacts between the helices displace some of the lipid molecules surrounding the helices. figure 10–23 β barrels formed from different numbers of β strands.</code>                                                                                                                                                                                                                                                                                                                                                                                                                      | <code>β-barrel proteins are abundant in the outer membranes of bacteria, mitochondria, and chloroplasts. some are pore-forming proteins, which create water-filled channels that allow selected small hydrophilic molecules to cross the membrane. the porins are well-studied examples (example 3 in figure 10–23c). many porin barrels are formed from a 16-strand, antiparallel β sheet rolled up into a cylindrical structure. polar amino acid side chains line the aqueous channel on the inside, while nonpolar side chains project from the outside of the barrel to interact with the hydrophobic core of the lipid bilayer. loops of the polypeptide chain often protrude into the lumen of the channel, narrowing it so that only certain solutes can pass. some porins are therefore highly selective: maltoporin, for example, preferentially allows maltose and maltose oligomers to cross the outer membrane of e. coli.</code>                                                                                              | <code>figure 3–8 two types of β sheet structures. (a) an antiparallel β sheet (see figure 3–7c). (b) a parallel β sheet. both of these structures are common in proteins.</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | <code>the cores of many proteins contain extensive regions of β sheet. as shown in figure 3–8, these β sheets can form either from neighboring segments of the polypeptide backbone that run in the same orientation (parallel chains) or from a polypeptide backbone that folds back and forth upon itself, with each section of the chain running in the direction opposite to that of its immediate neighbors (antiparallel chains). both types of β sheet produce a very rigid structure, held together by hydrogen bonds that connect the peptide bonds in neighboring chains (see figure 3–7c).</code>                                                                                                                                                                                                                                                                                                                                                                                                                                  | <code>one of the central subunits of the sam complex is homologous to a bacterial outer membrane protein that helps insert β-barrel proteins into the bacterial outer figure 12–24 integration of porins into the outer mitochondrial and bacterial membranes. (a) after translocation through the tom complex in the outer mitochondrial membrane, β-barrel proteins bind to chaperones in the intermembrane space. the sam complex then inserts the unfolded polypeptide chain into the outer membrane and helps the chain fold. (b) a structurally related bam complex in the outer membrane of gram-negative bacteria catalyzes β-barrel protein insertion and folding (see figure 11–17). membrane from the periplasmic space (the equivalent of the intermembrane space in mitochondria) (figure 12–24b). this conserved pathway for inserting β-barrel proteins further underscores the endosymbiotic origin of mitochondria. transport into the inner mitochondrial membrane and intermembrane space occurs via several routes</code> |
+* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 10.0,
+      "num_negatives": 4,
+      "activation_fn": "torch.nn.modules.activation.Sigmoid",
+      "mini_batch_size": 32
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 4
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `warmup_ratio`: 0.1
+- `bf16`: True
+- `dataloader_num_workers`: 4
+- `load_best_model_at_end`: True
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 4
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 4
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `tp_size`: 0
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step  | Training Loss | dev_ndcg@10 | NanoNQ_R100_ndcg@10 | NanoSCIDOCS_R100_ndcg@10 | NanoSciFact_R100_ndcg@10 | NanoBEIR_R100_mean_ndcg@10 |
+|:------:|:-----:|:-------------:|:-----------:|:-------------------:|:------------------------:|:------------------------:|:--------------------------:|
+| 0.0000 | 1     | 4.5922        | -           | -                   | -                        | -                        | -                          |
+| 0.0024 | 200   | 3.3404        | -           | -                   | -                        | -                        | -                          |
+| 0.0048 | 400   | 2.1271        | -           | -                   | -                        | -                        | -                          |
+| 0.0073 | 600   | 1.4865        | -           | -                   | -                        | -                        | -                          |
+| 0.0097 | 800   | 0.9195        | -           | -                   | -                        | -                        | -                          |
+| 0.0121 | 1000  | 0.5765        | -           | -                   | -                        | -                        | -                          |
+| 0.0145 | 1200  | 0.4458        | -           | -                   | -                        | -                        | -                          |
+| 0.0170 | 1400  | 0.3502        | -           | -                   | -                        | -                        | -                          |
+| 0.0194 | 1600  | 0.3753        | -           | -                   | -                        | -                        | -                          |
+| 0.0218 | 1800  | 0.3748        | -           | -                   | -                        | -                        | -                          |
+| 0.0242 | 2000  | 0.3334        | -           | -                   | -                        | -                        | -                          |
+| 0.0267 | 2200  | 0.3678        | -           | -                   | -                        | -                        | -                          |
+| 0.0291 | 2400  | 0.3326        | -           | -                   | -                        | -                        | -                          |
+| 0.0315 | 2600  | 0.2861        | -           | -                   | -                        | -                        | -                          |
+| 0.0339 | 2800  | 0.3241        | -           | -                   | -                        | -                        | -                          |
+| 0.0363 | 3000  | 0.2778        | -           | -                   | -                        | -                        | -                          |
+| 0.0388 | 3200  | 0.2823        | -           | -                   | -                        | -                        | -                          |
+| 0.0412 | 3400  | 0.292         | -           | -                   | -                        | -                        | -                          |
+| 0.0436 | 3600  | 0.2853        | -           | -                   | -                        | -                        | -                          |
+| 0.0460 | 3800  | 0.2239        | -           | -                   | -                        | -                        | -                          |
+| 0.0485 | 4000  | 0.242         | -           | -                   | -                        | -                        | -                          |
+| 0.0509 | 4200  | 0.2607        | -           | -                   | -                        | -                        | -                          |
+| 0.0533 | 4400  | 0.2567        | -           | -                   | -                        | -                        | -                          |
+| 0.0557 | 4600  | 0.2382        | -           | -                   | -                        | -                        | -                          |
+| 0.0582 | 4800  | 0.1988        | -           | -                   | -                        | -                        | -                          |
+| 0.0606 | 5000  | 0.2184        | -           | -                   | -                        | -                        | -                          |
+| 0.0630 | 5200  | 0.1865        | -           | -                   | -                        | -                        | -                          |
+| 0.0654 | 5400  | 0.2099        | -           | -                   | -                        | -                        | -                          |
+| 0.0678 | 5600  | 0.2375        | -           | -                   | -                        | -                        | -                          |
+| 0.0703 | 5800  | 0.2399        | -           | -                   | -                        | -                        | -                          |
+| 0.0727 | 6000  | 0.2486        | -           | -                   | -                        | -                        | -                          |
+| 0.0751 | 6200  | 0.2419        | -           | -                   | -                        | -                        | -                          |
+| 0.0775 | 6400  | 0.1771        | -           | -                   | -                        | -                        | -                          |
+| 0.0800 | 6600  | 0.2185        | -           | -                   | -                        | -                        | -                          |
+| 0.0824 | 6800  | 0.2261        | -           | -                   | -                        | -                        | -                          |
+| 0.0848 | 7000  | 0.2615        | -           | -                   | -                        | -                        | -                          |
+| 0.0872 | 7200  | 0.2662        | -           | -                   | -                        | -                        | -                          |
+| 0.0897 | 7400  | 0.2042        | -           | -                   | -                        | -                        | -                          |
+| 0.0921 | 7600  | 0.2712        | -           | -                   | -                        | -                        | -                          |
+| 0.0945 | 7800  | 0.3638        | -           | -                   | -                        | -                        | -                          |
+| 0.0969 | 8000  | 0.2343        | -           | -                   | -                        | -                        | -                          |
+| 0.0993 | 8200  | 0.3492        | -           | -                   | -                        | -                        | -                          |
+| 0.1018 | 8400  | 0.319         | -           | -                   | -                        | -                        | -                          |
+| 0.1042 | 8600  | 0.3326        | -           | -                   | -                        | -                        | -                          |
+| 0.1066 | 8800  | 0.3436        | -           | -                   | -                        | -                        | -                          |
+| 0.1090 | 9000  | 0.3442        | -           | -                   | -                        | -                        | -                          |
+| 0.1115 | 9200  | 0.2505        | -           | -                   | -                        | -                        | -                          |
+| 0.1139 | 9400  | 0.3844        | -           | -                   | -                        | -                        | -                          |
+| 0.1163 | 9600  | 0.4207        | -           | -                   | -                        | -                        | -                          |
+| 0.1187 | 9800  | 0.3018        | -           | -                   | -                        | -                        | -                          |
+| 0.1212 | 10000 | 0.3979        | 0.9704      | 0.3307 (-0.1699)    | 0.2968 (-0.0384)         | 0.7085 (-0.0014)         | 0.4453 (-0.0699)           |
+### Framework Versions
+- Python: 3.10.12
+- Sentence Transformers: 4.1.0
+- Transformers: 4.51.3
+- PyTorch: 2.7.0+cu126
+- Accelerate: 1.6.0
+- Datasets: 3.5.1
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

crossencoder-checkpoints/checkpoint-googlebert-10000/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "sentence_transformers": {
+    "activation_fn": "torch.nn.modules.activation.Sigmoid",
+    "version": "4.1.0"
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 28996
+}

crossencoder-checkpoints/checkpoint-googlebert-10000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e36dfe2d84bbe93c64fb4878c518e63a45145d922196f39763b5d0ffa429e2ed
+size 433267692

crossencoder-checkpoints/checkpoint-googlebert-10000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5b1aabb1a01516a6049168abb5af7255e52a32b8d35e55b49e7164429ea4295
+size 14645

crossencoder-checkpoints/checkpoint-googlebert-10000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4cfb34c5e798451a2249344555252a99e1e9f9e355f0d6a6f67cfa88d46b8e1
+size 1465

crossencoder-checkpoints/checkpoint-googlebert-10000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

crossencoder-checkpoints/checkpoint-googlebert-10000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

crossencoder-checkpoints/checkpoint-googlebert-10000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "padding": true,
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation": true,
+  "unk_token": "[UNK]"
+}

crossencoder-checkpoints/checkpoint-googlebert-10000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,426 @@

+{
+  "best_global_step": 10000,
+  "best_metric": 0.9703932950632154,
+  "best_model_checkpoint": "models/google-bert/bert-base-cased-cross_encoder_dataset_finetuned_bert_base-run-20250503-141028/checkpoint-10000",
+  "epoch": 0.12115631587874676,
+  "eval_steps": 10000,
+  "global_step": 10000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.2115631587874677e-05,
+      "grad_norm": 69.96532440185547,
+      "learning_rate": 0.0,
+      "loss": 4.5922,
+      "step": 1
+    },
+    {
+      "epoch": 0.0024231263175749354,
+      "grad_norm": 19.02867889404297,
+      "learning_rate": 4.821904531136419e-07,
+      "loss": 3.3404,
+      "step": 200
+    },
+    {
+      "epoch": 0.004846252635149871,
+      "grad_norm": 11.468595504760742,
+      "learning_rate": 9.6680397383087e-07,
+      "loss": 2.1271,
+      "step": 400
+    },
+    {
+      "epoch": 0.007269378952724805,
+      "grad_norm": 23.1657772064209,
+      "learning_rate": 1.451417494548098e-06,
+      "loss": 1.4865,
+      "step": 600
+    },
+    {
+      "epoch": 0.009692505270299741,
+      "grad_norm": 38.3974494934082,
+      "learning_rate": 1.936031015265326e-06,
+      "loss": 0.9195,
+      "step": 800
+    },
+    {
+      "epoch": 0.012115631587874676,
+      "grad_norm": 33.8629264831543,
+      "learning_rate": 2.420644535982554e-06,
+      "loss": 0.5765,
+      "step": 1000
+    },
+    {
+      "epoch": 0.01453875790544961,
+      "grad_norm": 77.52400207519531,
+      "learning_rate": 2.9052580566997825e-06,
+      "loss": 0.4458,
+      "step": 1200
+    },
+    {
+      "epoch": 0.016961884223024547,
+      "grad_norm": 44.24454116821289,
+      "learning_rate": 3.3898715774170105e-06,
+      "loss": 0.3502,
+      "step": 1400
+    },
+    {
+      "epoch": 0.019385010540599483,
+      "grad_norm": 0.05398047715425491,
+      "learning_rate": 3.874485098134238e-06,
+      "loss": 0.3753,
+      "step": 1600
+    },
+    {
+      "epoch": 0.021808136858174416,
+      "grad_norm": 60.81565856933594,
+      "learning_rate": 4.3590986188514665e-06,
+      "loss": 0.3748,
+      "step": 1800
+    },
+    {
+      "epoch": 0.024231263175749352,
+      "grad_norm": 58.663352966308594,
+      "learning_rate": 4.8437121395686945e-06,
+      "loss": 0.3334,
+      "step": 2000
+    },
+    {
+      "epoch": 0.026654389493324288,
+      "grad_norm": 252.8543243408203,
+      "learning_rate": 5.3283256602859225e-06,
+      "loss": 0.3678,
+      "step": 2200
+    },
+    {
+      "epoch": 0.02907751581089922,
+      "grad_norm": 75.89717102050781,
+      "learning_rate": 5.81293918100315e-06,
+      "loss": 0.3326,
+      "step": 2400
+    },
+    {
+      "epoch": 0.03150064212847416,
+      "grad_norm": 38.77901840209961,
+      "learning_rate": 6.2975527017203786e-06,
+      "loss": 0.2861,
+      "step": 2600
+    },
+    {
+      "epoch": 0.033923768446049093,
+      "grad_norm": 43.75902557373047,
+      "learning_rate": 6.782166222437606e-06,
+      "loss": 0.3241,
+      "step": 2800
+    },
+    {
+      "epoch": 0.03634689476362403,
+      "grad_norm": 9.858089447021484,
+      "learning_rate": 7.266779743154835e-06,
+      "loss": 0.2778,
+      "step": 3000
+    },
+    {
+      "epoch": 0.038770021081198966,
+      "grad_norm": 1.7009317874908447,
+      "learning_rate": 7.751393263872062e-06,
+      "loss": 0.2823,
+      "step": 3200
+    },
+    {
+      "epoch": 0.041193147398773895,
+      "grad_norm": 1.1402368545532227,
+      "learning_rate": 8.23600678458929e-06,
+      "loss": 0.292,
+      "step": 3400
+    },
+    {
+      "epoch": 0.04361627371634883,
+      "grad_norm": 35.95049285888672,
+      "learning_rate": 8.720620305306518e-06,
+      "loss": 0.2853,
+      "step": 3600
+    },
+    {
+      "epoch": 0.04603940003392377,
+      "grad_norm": 7.911437034606934,
+      "learning_rate": 9.205233826023747e-06,
+      "loss": 0.2239,
+      "step": 3800
+    },
+    {
+      "epoch": 0.048462526351498704,
+      "grad_norm": 20.524240493774414,
+      "learning_rate": 9.689847346740975e-06,
+      "loss": 0.242,
+      "step": 4000
+    },
+    {
+      "epoch": 0.05088565266907364,
+      "grad_norm": 29.201196670532227,
+      "learning_rate": 1.0174460867458203e-05,
+      "loss": 0.2607,
+      "step": 4200
+    },
+    {
+      "epoch": 0.053308778986648576,
+      "grad_norm": 8.403849601745605,
+      "learning_rate": 1.065907438817543e-05,
+      "loss": 0.2567,
+      "step": 4400
+    },
+    {
+      "epoch": 0.055731905304223506,
+      "grad_norm": 0.07258583605289459,
+      "learning_rate": 1.114368790889266e-05,
+      "loss": 0.2382,
+      "step": 4600
+    },
+    {
+      "epoch": 0.05815503162179844,
+      "grad_norm": 8.787927627563477,
+      "learning_rate": 1.1628301429609888e-05,
+      "loss": 0.1988,
+      "step": 4800
+    },
+    {
+      "epoch": 0.06057815793937338,
+      "grad_norm": 38.405887603759766,
+      "learning_rate": 1.2112914950327115e-05,
+      "loss": 0.2184,
+      "step": 5000
+    },
+    {
+      "epoch": 0.06300128425694831,
+      "grad_norm": 12.370210647583008,
+      "learning_rate": 1.2597528471044342e-05,
+      "loss": 0.1865,
+      "step": 5200
+    },
+    {
+      "epoch": 0.06542441057452325,
+      "grad_norm": 0.00251359143294394,
+      "learning_rate": 1.3082141991761572e-05,
+      "loss": 0.2099,
+      "step": 5400
+    },
+    {
+      "epoch": 0.06784753689209819,
+      "grad_norm": 215.8644256591797,
+      "learning_rate": 1.35667555124788e-05,
+      "loss": 0.2375,
+      "step": 5600
+    },
+    {
+      "epoch": 0.07027066320967312,
+      "grad_norm": 7.582530975341797,
+      "learning_rate": 1.4051369033196027e-05,
+      "loss": 0.2399,
+      "step": 5800
+    },
+    {
+      "epoch": 0.07269378952724806,
+      "grad_norm": 110.00708770751953,
+      "learning_rate": 1.4535982553913256e-05,
+      "loss": 0.2486,
+      "step": 6000
+    },
+    {
+      "epoch": 0.075116915844823,
+      "grad_norm": 9.572257041931152,
+      "learning_rate": 1.5020596074630483e-05,
+      "loss": 0.2419,
+      "step": 6200
+    },
+    {
+      "epoch": 0.07754004216239793,
+      "grad_norm": 42.000762939453125,
+      "learning_rate": 1.550520959534771e-05,
+      "loss": 0.1771,
+      "step": 6400
+    },
+    {
+      "epoch": 0.07996316847997285,
+      "grad_norm": 55.88412094116211,
+      "learning_rate": 1.598982311606494e-05,
+      "loss": 0.2185,
+      "step": 6600
+    },
+    {
+      "epoch": 0.08238629479754779,
+      "grad_norm": 0.12212779372930527,
+      "learning_rate": 1.6474436636782166e-05,
+      "loss": 0.2261,
+      "step": 6800
+    },
+    {
+      "epoch": 0.08480942111512273,
+      "grad_norm": 0.22978061437606812,
+      "learning_rate": 1.6959050157499395e-05,
+      "loss": 0.2615,
+      "step": 7000
+    },
+    {
+      "epoch": 0.08723254743269766,
+      "grad_norm": 13.786860466003418,
+      "learning_rate": 1.7443663678216624e-05,
+      "loss": 0.2662,
+      "step": 7200
+    },
+    {
+      "epoch": 0.0896556737502726,
+      "grad_norm": 1.807544231414795,
+      "learning_rate": 1.7928277198933853e-05,
+      "loss": 0.2042,
+      "step": 7400
+    },
+    {
+      "epoch": 0.09207880006784754,
+      "grad_norm": 200.43603515625,
+      "learning_rate": 1.841289071965108e-05,
+      "loss": 0.2712,
+      "step": 7600
+    },
+    {
+      "epoch": 0.09450192638542247,
+      "grad_norm": 6.414185047149658,
+      "learning_rate": 1.8897504240368307e-05,
+      "loss": 0.3638,
+      "step": 7800
+    },
+    {
+      "epoch": 0.09692505270299741,
+      "grad_norm": 19.78321075439453,
+      "learning_rate": 1.9382117761085536e-05,
+      "loss": 0.2343,
+      "step": 8000
+    },
+    {
+      "epoch": 0.09934817902057234,
+      "grad_norm": 0.0026795840822160244,
+      "learning_rate": 1.9866731281802765e-05,
+      "loss": 0.3492,
+      "step": 8200
+    },
+    {
+      "epoch": 0.10177130533814728,
+      "grad_norm": 0.12078650295734406,
+      "learning_rate": 1.9960960637553176e-05,
+      "loss": 0.319,
+      "step": 8400
+    },
+    {
+      "epoch": 0.10419443165572222,
+      "grad_norm": 0.06971794366836548,
+      "learning_rate": 1.9907113241074796e-05,
+      "loss": 0.3326,
+      "step": 8600
+    },
+    {
+      "epoch": 0.10661755797329715,
+      "grad_norm": 4.194886207580566,
+      "learning_rate": 1.9853265844596416e-05,
+      "loss": 0.3436,
+      "step": 8800
+    },
+    {
+      "epoch": 0.10904068429087209,
+      "grad_norm": 0.14339782297611237,
+      "learning_rate": 1.9799418448118035e-05,
+      "loss": 0.3442,
+      "step": 9000
+    },
+    {
+      "epoch": 0.11146381060844701,
+      "grad_norm": 0.16107772290706635,
+      "learning_rate": 1.9745571051639655e-05,
+      "loss": 0.2505,
+      "step": 9200
+    },
+    {
+      "epoch": 0.11388693692602195,
+      "grad_norm": 0.0043859235011041164,
+      "learning_rate": 1.9691723655161275e-05,
+      "loss": 0.3844,
+      "step": 9400
+    },
+    {
+      "epoch": 0.11631006324359688,
+      "grad_norm": 17.954904556274414,
+      "learning_rate": 1.9637876258682895e-05,
+      "loss": 0.4207,
+      "step": 9600
+    },
+    {
+      "epoch": 0.11873318956117182,
+      "grad_norm": 0.1238822266459465,
+      "learning_rate": 1.9584028862204514e-05,
+      "loss": 0.3018,
+      "step": 9800
+    },
+    {
+      "epoch": 0.12115631587874676,
+      "grad_norm": 0.001291484571993351,
+      "learning_rate": 1.9530181465726134e-05,
+      "loss": 0.3979,
+      "step": 10000
+    },
+    {
+      "epoch": 0.12115631587874676,
+      "eval_NanoBEIR_R100_mean_base_map": 0.45455600104354227,
+      "eval_NanoBEIR_R100_mean_base_mrr@10": 0.5547539682539683,
+      "eval_NanoBEIR_R100_mean_base_ndcg@10": 0.5152256348613615,
+      "eval_NanoBEIR_R100_mean_map": 0.3952857369390113,
+      "eval_NanoBEIR_R100_mean_mrr@10": 0.503989417989418,
+      "eval_NanoBEIR_R100_mean_ndcg@10": 0.4453369270017804,
+      "eval_NanoNQ_R100_base_map": 0.4196061957396544,
+      "eval_NanoNQ_R100_base_mrr@10": 0.4266904761904762,
+      "eval_NanoNQ_R100_base_ndcg@10": 0.5006467934630127,
+      "eval_NanoNQ_R100_map": 0.26760554447403784,
+      "eval_NanoNQ_R100_mrr@10": 0.30400000000000005,
+      "eval_NanoNQ_R100_ndcg@10": 0.330741865534241,
+      "eval_NanoSCIDOCS_R100_base_map": 0.27430707601124094,
+      "eval_NanoSCIDOCS_R100_base_mrr@10": 0.5595238095238095,
+      "eval_NanoSCIDOCS_R100_base_ndcg@10": 0.33512313493909396,
+      "eval_NanoSCIDOCS_R100_map": 0.24249363273783214,
+      "eval_NanoSCIDOCS_R100_mrr@10": 0.527079365079365,
+      "eval_NanoSCIDOCS_R100_ndcg@10": 0.2967566724802968,
+      "eval_NanoSciFact_R100_base_map": 0.6697547313797314,
+      "eval_NanoSciFact_R100_base_mrr@10": 0.678047619047619,
+      "eval_NanoSciFact_R100_base_ndcg@10": 0.709906976181978,
+      "eval_NanoSciFact_R100_map": 0.6757580336051641,
+      "eval_NanoSciFact_R100_mrr@10": 0.6808888888888889,
+      "eval_NanoSciFact_R100_ndcg@10": 0.7085122429908034,
+      "eval_dev_map": 0.9441464035183368,
+      "eval_dev_mrr@10": 0.9441464035183368,
+      "eval_dev_ndcg@10": 0.9703932950632154,
+      "eval_runtime": 2239.1601,
+      "eval_samples_per_second": 0.0,
+      "eval_sequential_score": 0.4453369270017804,
+      "eval_steps_per_second": 0.0,
+      "step": 10000
+    }
+  ],
+  "logging_steps": 200,
+  "max_steps": 82538,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

crossencoder-checkpoints/checkpoint-googlebert-10000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40289707f5598971ba9c6d98fe4f8c454049550e6eafc2a0a7fa3a69af3ae017
+size 6225

crossencoder-checkpoints/checkpoint-googlebert-10000/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff