Upload ModernBERT model

Browse files

Files changed (13) hide show

1_Pooling/config.json +10 -0
README.md +500 -0
added_tokens.json +6 -0
config.json +48 -0
config_sentence_transformers.json +10 -0
merges.txt +0 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +97 -0
vocab.json +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 512,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,500 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:1761750
+- loss:MultipleNegativesRankingLoss
+base_model: Shuu12121/CodeModernBERT-Snake
+widget:
+- source_sentence: // MiddlewareHeaders adds headers to a handler
+  sentences:
+  - "function create(options) {\n  console.log('Creating app: ', options.name);\n\n\
+    \  if (!options.status)\n    options.status = (code, msg) => {\n      console.log(msg);\n\
+    \    };\n  if (!options.type) options.type = 'rekit-react';\n\n  const prjDir\
+    \ = path.join(options.location || process.cwd(), options.name);\n  return new\
+    \ Promise(async (resolve, reject) => {\n    try {\n      if (fs.existsSync(prjDir))\
+    \ {\n        reject('FOLDER_EXISTS');\n        return;\n      }\n      fs.mkdirSync(prjDir);\n\
+    \      let gitRepo;\n      if (options.source) {\n        if (/^https?:/.test(options.source))\
+    \ {\n          // It's a git repo\n          gitRepo = options.source;\n     \
+    \   } else {\n          // It's a local folder\n          const srcDir = path.isAbsolute(options.source)\n\
+    \            ? options.source\n            : path.join(process.cwd(), options.source);\n\
+    \          options.status('CREATE_APP_COPY_FILES', `Copy files from ${srcDir}...`);\n\
+    \          await fs.copy(srcDir, prjDir, {\n            filter: src => !/\\/(\\\
+    .git|node_modules\\/|node_modules$)/.test(src) || path.basename(src) === '.gitignore',\n\
+    \          });\n        }\n      } else if (options.type) {\n        // Get gitRepo\n\
+    \        options.status(\n          'QUERY_APP_TYPES_GIT_REPO',\n          `Looking\
+    \ for the git repo for app type ${options.type}...`,\n        );\n        const\
+    \ appTypes = await getAppTypes();\n        const appType = _.find(appTypes, {\
+    \ id: options.type });\n        if (!appType) reject('APP_TYPE_NOT_SUPPORTED');\n\
+    \        gitRepo = appType.repo;\n      } else {\n        await fs.remove(prjDir);\n\
+    \        reject('NO_SOURCE_OR_APP_TYPE');\n      }\n\n      if (gitRepo) {\n \
+    \       options.status('CLONE_PROJECT', `Downloading project from ${gitRepo}...`);\n\
+    \        await cloneRepo(gitRepo, prjDir);\n      }\n\n      postCreate(prjDir,\
+    \ options);\n      options.status('CREATION_SUCCESS', '\U0001F603App creation\
+    \ success.');\n      resolve();\n    } catch (err) {\n      console.log('Failed\
+    \ to create project.');\n      fs.removeSync(prjDir);\n      reject(err);\n  \
+    \  }\n  });\n}"
+  - "@Override\n\tpublic void setFrameworkID(Option<Protos.FrameworkID> frameworkID)\
+    \ throws Exception {\n\t\tsynchronized (startStopLock) {\n\t\t\tverifyIsRunning();\n\
+    \n\t\t\tbyte[] value = frameworkID.isDefined() ? frameworkID.get().getValue().getBytes(ConfigConstants.DEFAULT_CHARSET)\
+    \ :\n\t\t\t\tnew byte[0];\n\t\t\tframeworkIdInZooKeeper.setValue(value);\n\t\t\
+    }\n\t}"
+  - "func MiddlewareHeaders(vs map[string]string) Middleware {\n\treturn func(h http.Handler)\
+    \ http.Handler {\n\t\treturn http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request)\
+    \ {\n\t\t\t// Add headers\n\t\t\thandleHeaders(vs, rw)\n\n\t\t\t// Next handler\n\
+    \t\t\th.ServeHTTP(rw, r)\n\t\t})\n\t}\n}"
+- source_sentence: 'Parses a Plist XML string. Returns an Object.
+    @param {String} xml - the XML String to decode
+    @param {Function} callback - callback function
+    @returns {Mixed} the decoded value from the Plist XML
+    @api public
+    @deprecated use parse() instead'
+  sentences:
+  - "function parseStringSync (xml) {\n  var doc = new DOMParser().parseFromString(xml);\n\
+    \  var plist;\n  if (doc.documentElement.nodeName !== 'plist') {\n    throw new\
+    \ Error('malformed document. First element should be <plist>');\n  }\n  plist\
+    \ = parsePlistXML(doc.documentElement);\n\n  // if the plist is an array with\
+    \ 1 element, pull it out of the array\n  if (plist.length == 1) {\n    plist =\
+    \ plist[0];\n  }\n  return plist;\n}"
+  - "func GetCallStringArgsValues(n ast.Node, ctx *Context) []string {\n\tvalues :=\
+    \ []string{}\n\tswitch node := n.(type) {\n\tcase *ast.CallExpr:\n\t\tfor _, arg\
+    \ := range node.Args {\n\t\t\tswitch param := arg.(type) {\n\t\t\tcase *ast.BasicLit:\n\
+    \t\t\t\tvalue, err := GetString(param)\n\t\t\t\tif err == nil {\n\t\t\t\t\tvalues\
+    \ = append(values, value)\n\t\t\t\t}\n\t\t\tcase *ast.Ident:\n\t\t\t\tvalues =\
+    \ append(values, GetIdentStringValues(param)...)\n\t\t\t}\n\t\t}\n\t}\n\treturn\
+    \ values\n}"
+  - "public static Date beginOfYear(@NotNull final Date date) {\n\t\treturn DateUtils.truncate(date,\
+    \ Calendar.YEAR);\n\t}"
+- source_sentence: '// forbiddenImportsFor determines all of the forbidden
+    // imports for a package given the import restrictions
+    // and returns a deduplicated list of them'
+  sentences:
+  - "func (i *ImportRestriction) forbiddenImportsFor(pkg Package) []string {\n\tforbiddenImportSet\
+    \ := map[string]struct{}{}\n\timports := pkg.Imports\n\tif !i.ExcludeTests {\n\
+    \t\timports = append(imports, append(pkg.TestImports, pkg.XTestImports...)...)\n\
+    \t}\n\tfor _, imp := range imports {\n\t\tpath := extractVendorPath(imp)\n\t\t\
+    if i.isForbidden(path) {\n\t\t\tforbiddenImportSet[path] = struct{}{}\n\t\t}\n\
+    \t}\n\n\tvar forbiddenImports []string\n\tfor imp := range forbiddenImportSet\
+    \ {\n\t\tforbiddenImports = append(forbiddenImports, imp)\n\t}\n\treturn forbiddenImports\n\
+    }"
+  - "function pick(o, props = []) {\n    return props.reduce((acc, k) => {\n     \
+    \   if (o.hasOwnProperty(k)) {\n            acc[k] = o[k];\n        }\n\n    \
+    \    return acc;\n    }, {});\n}"
+  - "func (s *PutTraceSegmentsOutput) SetUnprocessedTraceSegments(v []*UnprocessedTraceSegment)\
+    \ *PutTraceSegmentsOutput {\n\ts.UnprocessedTraceSegments = v\n\treturn s\n}"
+- source_sentence: 'Validates whether the specified template is syntactically correct
+    and will be accepted by Azure Resource Manager..
+    @param resourceGroupName The name of the resource group the template will be deployed
+    to. The name is case insensitive.
+    @param deploymentName The name of the deployment.
+    @param properties The deployment properties.
+    @param serviceCallback the async ServiceCallback to handle successful and failed
+    responses.
+    @throws IllegalArgumentException thrown if parameters fail the validation
+    @return the {@link ServiceFuture} object'
+  sentences:
+  - "func Execute(v string) {\n\tversion = v\n\tif err := rootCmd.Execute(); err !=\
+    \ nil {\n\t\tlog.Fatal(err)\n\t}\n}"
+  - "function( otherPath )\r\n\t{\r\n\t\tvar thisElements = this.elements;\r\n\t\t\
+    var otherElements = otherPath && otherPath.elements;\r\n\r\n\t\tif ( !otherElements\
+    \ || thisElements.length != otherElements.length )\r\n\t\t\treturn false;\r\n\r\
+    \n\t\tfor ( var i = 0 ; i < thisElements.length ; i++ )\r\n\t\t{\r\n\t\t\tif (\
+    \ !thisElements[ i ].equals( otherElements[ i ] ) )\r\n\t\t\t\treturn false;\r\
+    \n\t\t}\r\n\r\n\t\treturn true;\r\n\t}"
+  - "public ServiceFuture<DeploymentValidateResultInner> validateAsync(String resourceGroupName,\
+    \ String deploymentName, DeploymentProperties properties, final ServiceCallback<DeploymentValidateResultInner>\
+    \ serviceCallback) {\n        return ServiceFuture.fromResponse(validateWithServiceResponseAsync(resourceGroupName,\
+    \ deploymentName, properties), serviceCallback);\n    }"
+- source_sentence: This method calculates the turn weight separately.
+  sentences:
+  - "private SingleType parseSingleType() throws TTXPathException {\n\n        final\
+    \ String atomicType = parseAtomicType();\n        final boolean intero = is(TokenType.INTERROGATION,\
+    \ true);\n        return new SingleType(atomicType, intero);\n    }"
+  - "public void putAllWriteable(BeanMap<T> map) {\n        map.types.keySet().stream().filter(key\
+    \ -> getWriteInvoker(key) != null).forEach(key -> this.put(key, map.get(key)));\n\
+    \    }"
+  - "public double calcTurnWeight(int edgeFrom, int nodeVia, int edgeTo) {\n     \
+    \   long turnFlags = turnCostExt.getTurnCostFlags(edgeFrom, nodeVia, edgeTo);\n\
+    \        if (turnCostEncoder.isTurnRestricted(turnFlags))\n            return\
+    \ Double.POSITIVE_INFINITY;\n\n        return turnCostEncoder.getTurnCost(turnFlags);\n\
+    \    }"
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on Shuu12121/CodeModernBERT-Snake
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Shuu12121/CodeModernBERT-Snake](https://huggingface.co/Shuu12121/CodeModernBERT-Snake). It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [Shuu12121/CodeModernBERT-Snake](https://huggingface.co/Shuu12121/CodeModernBERT-Snake) <!-- at revision 73927b7c029b82e13135c02a1d4c5b50564d1e0d -->
+- **Maximum Sequence Length:** 1024 tokens
+- **Output Dimensionality:** 512 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: ModernBertModel
+  (1): Pooling({'word_embedding_dimension': 512, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    'This method calculates the turn weight separately.',
+    'public double calcTurnWeight(int edgeFrom, int nodeVia, int edgeTo) {\n        long turnFlags = turnCostExt.getTurnCostFlags(edgeFrom, nodeVia, edgeTo);\n        if (turnCostEncoder.isTurnRestricted(turnFlags))\n            return Double.POSITIVE_INFINITY;\n\n        return turnCostEncoder.getTurnCost(turnFlags);\n    }',
+    'public void putAllWriteable(BeanMap<T> map) {\n        map.types.keySet().stream().filter(key -> getWriteInvoker(key) != null).forEach(key -> this.put(key, map.get(key)));\n    }',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 512]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 1,761,750 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                         | sentence_1                                                                            | label                                                         |
+  |:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                                | float                                                         |
+  | details | <ul><li>min: 3 tokens</li><li>mean: 47.87 tokens</li><li>max: 633 tokens</li></ul> | <ul><li>min: 28 tokens</li><li>mean: 164.44 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
+* Samples:
+  | sentence_0                                                                                                       | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | label            |
+  |:-----------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
+  | <code>// Read reads from serial port.<br>// It is blocked until data received or timeout after p.timeout.</code> | <code>func (p *port) Read(b []byte) (n int, err error) {<br>	var done uint32<br>	if err = syscall.ReadFile(p.handle, b, &done, nil); err != nil {<br>		return<br>	}<br>	if done == 0 {<br>		err = ErrTimeout<br>		return<br>	}<br>	n = int(done)<br>	return<br>}</code>                                                                                                                                                                                                                                                                                                                                                                                                                         | <code>1.0</code> |
+  | <code>// _NET_WM_STRUT_PARTIAL set</code>                                                                        | <code>func WmStrutPartialSet(xu *xgbutil.XUtil, win xproto.Window,<br>	struts *WmStrutPartial) error {<br><br>	rawStruts := make([]uint, 12)<br>	rawStruts[0] = struts.Left<br>	rawStruts[1] = struts.Right<br>	rawStruts[2] = struts.Top<br>	rawStruts[3] = struts.Bottom<br>	rawStruts[4] = struts.LeftStartY<br>	rawStruts[5] = struts.LeftEndY<br>	rawStruts[6] = struts.RightStartY<br>	rawStruts[7] = struts.RightEndY<br>	rawStruts[8] = struts.TopStartX<br>	rawStruts[9] = struts.TopEndX<br>	rawStruts[10] = struts.BottomStartX<br>	rawStruts[11] = struts.BottomEndX<br><br>	return xprop.ChangeProp32(xu, win, "_NET_WM_STRUT_PARTIAL", "CARDINAL",<br>		rawStruts...)<br>}</code> | <code>1.0</code> |
+  | <code>// Union returns a new geometry representing all points in this geometry and the<br>// other.</code>       | <code>func (g *Geometry) Union(other *Geometry) (*Geometry, error) {<br>	return g.binaryTopo("Union", cGEOSUnion, other)<br>}</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | <code>1.0</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 400
+- `per_device_eval_batch_size`: 400
+- `num_train_epochs`: 5
+- `fp16`: True
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 400
+- `per_device_eval_batch_size`: 400
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 5
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `tp_size`: 0
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+</details>
+### Training Logs
+| Epoch  | Step  | Training Loss |
+|:------:|:-----:|:-------------:|
+| 0.1135 | 500   | 1.0064        |
+| 0.2270 | 1000  | 0.1985        |
+| 0.3405 | 1500  | 0.1802        |
+| 0.4540 | 2000  | 0.1659        |
+| 0.5675 | 2500  | 0.1583        |
+| 0.6810 | 3000  | 0.153         |
+| 0.7946 | 3500  | 0.1478        |
+| 0.9081 | 4000  | 0.1425        |
+| 1.0216 | 4500  | 0.132         |
+| 1.1351 | 5000  | 0.097         |
+| 1.2486 | 5500  | 0.1           |
+| 1.3621 | 6000  | 0.0972        |
+| 1.4756 | 6500  | 0.0958        |
+| 1.5891 | 7000  | 0.0968        |
+| 1.7026 | 7500  | 0.0945        |
+| 1.8161 | 8000  | 0.0943        |
+| 1.9296 | 8500  | 0.0938        |
+| 2.0431 | 9000  | 0.0831        |
+| 2.1566 | 9500  | 0.0634        |
+| 2.2701 | 10000 | 0.0642        |
+| 2.3837 | 10500 | 0.0639        |
+| 2.4972 | 11000 | 0.0646        |
+| 2.6107 | 11500 | 0.065         |
+| 2.7242 | 12000 | 0.0637        |
+| 2.8377 | 12500 | 0.062         |
+| 2.9512 | 13000 | 0.0626        |
+| 3.0647 | 13500 | 0.0522        |
+| 3.1782 | 14000 | 0.0443        |
+| 3.2917 | 14500 | 0.0435        |
+| 3.4052 | 15000 | 0.0447        |
+| 3.5187 | 15500 | 0.0441        |
+| 3.6322 | 16000 | 0.045         |
+| 3.7457 | 16500 | 0.0443        |
+| 3.8593 | 17000 | 0.0441        |
+| 3.9728 | 17500 | 0.0433        |
+| 4.0863 | 18000 | 0.0368        |
+| 4.1998 | 18500 | 0.0333        |
+| 4.3133 | 19000 | 0.0332        |
+| 4.4268 | 19500 | 0.0335        |
+| 4.5403 | 20000 | 0.033         |
+| 4.6538 | 20500 | 0.0334        |
+| 4.7673 | 21000 | 0.0325        |
+| 4.8808 | 21500 | 0.0342        |
+| 4.9943 | 22000 | 0.0341        |
+### Framework Versions
+- Python: 3.11.12
+- Sentence Transformers: 3.4.1
+- Transformers: 4.51.3
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.5.2
+- Datasets: 3.5.0
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

added_tokens.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "</s>": 50001,
+  "<mask>": 50003,
+  "<s>": 50000,
+  "<unk>": 50002
+}

config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "architectures": [
+    "ModernBertModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 50000,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "cls",
+  "cls_token_id": 50281,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "embedding_dropout": 0.0,
+  "eos_token_id": 50001,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000.0,
+  "hidden_activation": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 512,
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "local_attention": 128,
+  "local_attention_rope_theta": 10000,
+  "local_attention_window": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 8,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "repad_logits_with_grad": false,
+  "rope_theta": 160000,
+  "sep_token_id": 50282,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.3",
+  "type_vocab_size": 2,
+  "vocab_size": 50004
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.4.1",
+    "transformers": "4.51.3",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c842f6cfef5c72cfe06132e4b74d012ab7573b6be2d78a0709c7aaf5f30998c9
+size 303793176

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 1024,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50000": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50001": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50002": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50003": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": null,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff