Update README.md

Browse files

Files changed (1) hide show

README.md +74 -513

README.md CHANGED Viewed

@@ -2,551 +2,112 @@
 tags:
 - sentence-transformers
 - sentence-similarity
-- feature-extraction
-- generated_from_trainer
-- dataset_size:1736166
-- loss:MultipleNegativesRankingLoss
 base_model: Shuu12121/CodeModernBERT-Owl-1.0
-widget:
-- source_sentence: 'Releases any resources allocated by `mountComponent`.
-    @final
-    @internal'
-  sentences:
-  - "private Range<RowKeyWrapper> rowRangeToRange(RowRange btRange) {\n    final BoundType\
-    \ startBound;\n    final ByteString startKey;\n\n    switch (btRange.getStartKeyCase())\
-    \ {\n      case START_KEY_OPEN:\n        startBound = BoundType.OPEN;\n      \
-    \  startKey = btRange.getStartKeyOpen();\n        break;\n      case START_KEY_CLOSED:\n\
-    \        startBound = BoundType.CLOSED;\n        startKey = btRange.getStartKeyClosed();\n\
-    \        break;\n      case STARTKEY_NOT_SET:\n        startBound = BoundType.CLOSED;\n\
-    \        startKey = ByteString.EMPTY;\n        break;\n      default:\n      \
-    \  throw new IllegalArgumentException(\"Unexpected start key case: \" +\n    \
-    \        btRange.getStartKeyCase());\n    }\n\n    final BoundType endBound;\n\
-    \    final ByteString endKey;\n    switch (btRange.getEndKeyCase()) {\n      case\
-    \ END_KEY_OPEN:\n        endBound = BoundType.OPEN;\n        endKey = btRange.getEndKeyOpen();\n\
-    \        break;\n      case END_KEY_CLOSED:\n        endBound = BoundType.CLOSED;\n\
-    \        endKey = btRange.getEndKeyClosed();\n        break;\n      case ENDKEY_NOT_SET:\n\
-    \        endBound = BoundType.OPEN;\n        endKey = ByteString.EMPTY;\n    \
-    \    break;\n      default:\n        throw new IllegalArgumentException(\"Unexpected\
-    \ end key case: \" + btRange.getEndKeyCase());\n    }\n\n    return boundedRange(startBound,\
-    \ startKey, endBound, endKey);\n  }"
-  - "public static FacesMessage getMessage(FacesMessage.Severity severity,\n     \
-    \                                     String messageId,\n                    \
-    \                      Object arg)\n    {\n        return getMessage(severity,\n\
-    \                          messageId,\n                          new Object[]{arg},\n\
-    \                          FacesContext.getCurrentInstance());\n    }"
-  - "function() {\n    var inst = this._instance;\n\n    if (inst.componentWillUnmount)\
-    \ {\n      var previouslyUnmounting = ReactLifeCycle.currentlyUnmountingInstance;\n\
-    \      ReactLifeCycle.currentlyUnmountingInstance = this;\n      try {\n     \
-    \   inst.componentWillUnmount();\n      } finally {\n        ReactLifeCycle.currentlyUnmountingInstance\
-    \ = previouslyUnmounting;\n      }\n    }\n\n    ReactReconciler.unmountComponent(this._renderedComponent);\n\
-    \    this._renderedComponent = null;\n\n    // Reset pending fields\n    this._pendingStateQueue\
-    \ = null;\n    this._pendingReplaceState = false;\n    this._pendingForceUpdate\
-    \ = false;\n    this._pendingCallbacks = null;\n    this._pendingElement = null;\n\
-    \n    // These fields do not really need to be reset since this object is no\n\
-    \    // longer accessible.\n    this._context = null;\n    this._rootNodeID =\
-    \ null;\n\n    // Delete the reference from the instance to this internal representation\n\
-    \    // which allow the internals to be properly cleaned up even if the user\n\
-    \    // leaks a reference to the public instance.\n    ReactInstanceMap.remove(inst);\n\
-    \n    // Some existing components rely on inst.props even after they've been\n\
-    \    // destroyed (in event handlers).\n    // TODO: inst.props = null;\n    //\
-    \ TODO: inst.state = null;\n    // TODO: inst.context = null;\n  }"
-- source_sentence: '// NewRandomForest generates and return a new random forests
-    // forestSize controls the number of trees that get built
-    // features controls the number of features used to build each tree.'
-  sentences:
-  - "function initShaders() {\n  var fragmentShader = getShader(gl, fragmentShaderSource,\
-    \ false);\n  var vertexShader = getShader(gl, vertexShaderSource, true);\n\n \
-    \ // Create the shader program\n\n  shaderProgram = gl.createProgram();\n  gl.attachShader(shaderProgram,\
-    \ vertexShader);\n  gl.attachShader(shaderProgram, fragmentShader);\n  gl.linkProgram(shaderProgram);\n\
-    \n  // If creating the shader program failed, alert\n\n  if (!gl.getProgramParameter(shaderProgram,\
-    \ gl.LINK_STATUS)) {\n    alert(\"Unable to initialize the shader program: \"\
-    \ + gl.getProgramInfoLog(shader));\n  }\n\n  gl.useProgram(shaderProgram);\n\n\
-    \  vertexPositionAttribute = gl.getAttribLocation(shaderProgram, \"aVertexPosition\"\
-    );\n  gl.enableVertexAttribArray(vertexPositionAttribute);\n}"
-  - "func (s *UpdateStackInput) SetConfigurationManager(v *StackConfigurationManager)\
-    \ *UpdateStackInput {\n\ts.ConfigurationManager = v\n\treturn s\n}"
-  - "func NewRandomForest(forestSize int, features int) *RandomForest {\n\tret :=\
-    \ &RandomForest{\n\t\tbase.BaseClassifier{},\n\t\tforestSize,\n\t\tfeatures,\n\
-    \t\tnil,\n\t}\n\treturn ret\n}"
-- source_sentence: // defaultQueueURL is a method returns the default queue url
-  sentences:
-  - "public static int parseUnsignedInt(String s, int radix)\n                throws\
-    \ NumberFormatException {\n        if (s == null)  {\n            throw new NumberFormatException(\"\
-    null\");\n        }\n\n        int len = s.length();\n        if (len > 0) {\n\
-    \            char firstChar = s.charAt(0);\n            if (firstChar == '-')\
-    \ {\n                throw new\n                    NumberFormatException(String.format(\"\
-    Illegal leading minus sign \" +\n                                            \
-    \           \"on unsigned string %s.\", s));\n            } else {\n         \
-    \       if (len <= 5 || // Integer.MAX_VALUE in Character.MAX_RADIX is 6 digits\n\
-    \                    (radix == 10 && len <= 9) ) { // Integer.MAX_VALUE in base\
-    \ 10 is 10 digits\n                    return parseInt(s, radix);\n          \
-    \      } else {\n                    long ell = Long.parseLong(s, radix);\n  \
-    \                  if ((ell & 0xffff_ffff_0000_0000L) == 0) {\n              \
-    \          return (int) ell;\n                    } else {\n                 \
-    \       throw new\n                            NumberFormatException(String.format(\"\
-    String value %s exceeds \" +\n                                               \
-    \                 \"range of unsigned int.\", s));\n                    }\n  \
-    \              }\n            }\n        } else {\n            throw NumberFormatException.forInputString(s);\n\
-    \        }\n    }"
-  - "func (f *Filter) shouldNamePass(key string) bool {\n\tpass := func(f *Filter)\
-    \ bool {\n\t\tif f.namePass.Match(key) {\n\t\t\treturn true\n\t\t}\n\t\treturn\
-    \ false\n\t}\n\n\tdrop := func(f *Filter) bool {\n\t\tif f.nameDrop.Match(key)\
-    \ {\n\t\t\treturn false\n\t\t}\n\t\treturn true\n\t}\n\n\tif f.namePass != nil\
-    \ && f.nameDrop != nil {\n\t\treturn pass(f) && drop(f)\n\t} else if f.namePass\
-    \ != nil {\n\t\treturn pass(f)\n\t} else if f.nameDrop != nil {\n\t\treturn drop(f)\n\
-    \t}\n\n\treturn true\n}"
-  - "func (b *Broker) defaultQueueURL() *string {\n\tif b.queueUrl != nil {\n\t\t\
-    return b.queueUrl\n\t} else {\n\t\treturn aws.String(b.GetConfig().Broker + \"\
-    /\" + b.GetConfig().DefaultQueue)\n\t}\n\n}"
-- source_sentence: Check if the input `node` is a binding identifier.
-  sentences:
-  - "function isBinding(node, parent) {\n  var keys = _retrievers.getBindingIdentifiers.keys[parent.type];\n\
-    \  if (keys) {\n    for (var i = 0; i < keys.length; i++) {\n      var key = keys[i];\n\
-    \      var val = parent[key];\n      if (Array.isArray(val)) {\n        if (val.indexOf(node)\
-    \ >= 0) return true;\n      } else {\n        if (val === node) return true;\n\
-    \      }\n    }\n  }\n\n  return false;\n}"
-  - "public Type build(Type givenType){\n        final ClassType javersType = mapper.getJaversClassType(givenType);\n\
-    \n        //for Generics, we have list of type arguments to dehydrate\n      \
-    \  if (javersType.isGenericType()) {\n            List<Type> actualDehydratedTypeArguments\
-    \ = extractAndDehydrateTypeArguments(javersType);\n            return new ParametrizedDehydratedType(javersType.getBaseJavaClass(),\
-    \ actualDehydratedTypeArguments);\n        }\n\n        if (javersType instanceof\
-    \ ArrayType){\n            Type dehydratedItemType = build( javersType.getConcreteClassTypeArguments().get(0)\
-    \ );\n            if (dehydratedItemType == GlobalId.class){\n               \
-    \ return GLOBAL_ID_ARRAY_TYPE;\n            }\n            return givenType;\n\
-    \        }\n\n        return javersType.getRawDehydratedType();\n    }"
-  - "public function getMemoryUsage()\n    {\n        $size = memory_get_usage(true);\n\
-    \        $unit = array('b','kb','mb','gb','tb','pb');\n                \n    \
-    \    return @round($size/pow(1024,($i=floor(log($size,1024)))),2).' '.$unit[$i];\n\
-    \    }"
-- source_sentence: // SetGameSessionQueueArns sets the GameSessionQueueArns field's
-    value.
-  sentences:
-  - "func (s *MatchmakingConfiguration) SetGameSessionQueueArns(v []*string) *MatchmakingConfiguration\
-    \ {\n\ts.GameSessionQueueArns = v\n\treturn s\n}"
-  - "protected function getRelationXmlHashFromDB(array $destinationContentIds)\n \
-    \   {\n        if (empty($destinationContentIds)) {\n            return array();\n\
-    \        }\n\n        $q = $this->db->createSelectQuery();\n        $q\n     \
-    \       ->select(\n                $this->db->aliasedColumn($q, 'id', 'ezcontentobject'),\n\
-    \                $this->db->aliasedColumn($q, 'remote_id', 'ezcontentobject'),\n\
-    \                $this->db->aliasedColumn($q, 'current_version', 'ezcontentobject'),\n\
-    \                $this->db->aliasedColumn($q, 'contentclass_id', 'ezcontentobject'),\n\
-    \                $this->db->aliasedColumn($q, 'node_id', 'ezcontentobject_tree'),\n\
-    \                $this->db->aliasedColumn($q, 'parent_node_id', 'ezcontentobject_tree'),\n\
-    \                $this->db->aliasedColumn($q, 'identifier', 'ezcontentclass')\n\
-    \            )\n            ->from($this->db->quoteTable('ezcontentobject'))\n\
-    \            ->leftJoin(\n                $this->db->quoteTable('ezcontentobject_tree'),\n\
-    \                $q->expr->lAnd(\n                    $q->expr->eq(\n        \
-    \                $this->db->quoteColumn('contentobject_id', 'ezcontentobject_tree'),\n\
-    \                        $this->db->quoteColumn('id', 'ezcontentobject')\n   \
-    \                 ),\n                    $q->expr->eq(\n                    \
-    \    $this->db->quoteColumn('node_id', 'ezcontentobject_tree'),\n            \
-    \            $this->db->quoteColumn('main_node_id', 'ezcontentobject_tree')\n\
-    \                    )\n                )\n            )\n            ->leftJoin(\n\
-    \                $this->db->quoteTable('ezcontentclass'),\n                $q->expr->lAnd(\n\
-    \                    $q->expr->eq(\n                        $this->db->quoteColumn('id',\
-    \ 'ezcontentclass'),\n                        $this->db->quoteColumn('contentclass_id',\
-    \ 'ezcontentobject')\n                    ),\n                    $q->expr->eq(\n\
-    \                        $this->db->quoteColumn('version', 'ezcontentclass'),\n\
-    \                        $q->bindValue(ContentType::STATUS_DEFINED, null, PDO::PARAM_INT)\n\
-    \                    )\n                )\n            )\n            ->where(\n\
-    \                $q->expr->in(\n                    $this->db->quoteColumn('id',\
-    \ 'ezcontentobject'),\n                    $destinationContentIds\n          \
-    \      )\n            );\n        $stmt = $q->prepare();\n        $stmt->execute();\n\
-    \n        return $stmt->fetchAll(PDO::FETCH_ASSOC | PDO::FETCH_GROUP);\n    }"
-  - "protected function _beforeDispatch(DispatcherContext $context)\n    {\n     \
-    \   // Check if the user has been explicitly authenticated for this request\n\
-    \        if (!$this->getUser()->isAuthentic(true))\n        {\n            foreach($this->__authenticator_queue\
-    \ as $authenticator)\n            {\n                if($authenticator->authenticateRequest($context)\
-    \ === true) {\n                    break;\n                }\n            }\n\
-    \        }\n\n    }"
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 ---
-# SentenceTransformer based on Shuu12121/CodeModernBERT-Owl-1.0
-This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Shuu12121/CodeModernBERT-Owl-1.0](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-1.0). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
-## Model Details
-### Model Description
-- **Model Type:** Sentence Transformer
-- **Base model:** [Shuu12121/CodeModernBERT-Owl-1.0](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-1.0) <!-- at revision e76a6056791a4f60962d3f2ce4cc7d94bd719485 -->
-- **Maximum Sequence Length:** 1024 tokens
-- **Output Dimensionality:** 768 dimensions
-- **Similarity Function:** Cosine Similarity
-<!-- - **Training Dataset:** Unknown -->
-<!-- - **Language:** Unknown -->
-<!-- - **License:** Unknown -->
-### Model Sources
-- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
-- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
-- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
-### Full Model Architecture
-```
-SentenceTransformer(
-  (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: ModernBertModel
-  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
-)
-```
-## Usage
-### Direct Usage (Sentence Transformers)
-First install the Sentence Transformers library:
-```bash
-pip install -U sentence-transformers
-```
-Then you can load this model and run inference.
-```python
-from sentence_transformers import SentenceTransformer
-# Download from the 🤗 Hub
-model = SentenceTransformer("sentence_transformers_model_id")
-# Run inference
-sentences = [
-    "// SetGameSessionQueueArns sets the GameSessionQueueArns field's value.",
-    'func (s *MatchmakingConfiguration) SetGameSessionQueueArns(v []*string) *MatchmakingConfiguration {\n\ts.GameSessionQueueArns = v\n\treturn s\n}',
-    'protected function _beforeDispatch(DispatcherContext $context)\n    {\n        // Check if the user has been explicitly authenticated for this request\n        if (!$this->getUser()->isAuthentic(true))\n        {\n            foreach($this->__authenticator_queue as $authenticator)\n            {\n                if($authenticator->authenticateRequest($context) === true) {\n                    break;\n                }\n            }\n        }\n\n    }',
-]
-embeddings = model.encode(sentences)
-print(embeddings.shape)
-# [3, 768]
-# Get the similarity scores for the embeddings
-similarities = model.similarity(embeddings, embeddings)
-print(similarities.shape)
-# [3, 3]
-```
-<!--
-### Direct Usage (Transformers)
-<details><summary>Click to see the direct usage in Transformers</summary>
-</details>
--->
-<!--
-### Downstream Usage (Sentence Transformers)
-You can finetune this model on your own dataset.
-<details><summary>Click to expand</summary>
-</details>
--->
-<!--
-### Out-of-Scope Use
-*List how the model may foreseeably be misused and address what users ought not to do with the model.*
--->
-<!--
-## Bias, Risks and Limitations
-*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
--->
-<!--
-### Recommendations
-*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
--->
-## Training Details
-### Training Dataset
-#### Unnamed Dataset
-* Size: 1,736,166 training samples
-* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
-* Approximate statistics based on the first 1000 samples:
-  |         | sentence_0                                                                          | sentence_1                                                                           | label                                                         |
-  |:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:--------------------------------------------------------------|
-  | type    | string                                                                              | string                                                                               | float                                                         |
-  | details | <ul><li>min: 3 tokens</li><li>mean: 48.85 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 30 tokens</li><li>mean: 171.5 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
-* Samples:
-  | sentence_0                                                                                                                      | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | label            |
-  |:--------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
-  | <code>// convDataValidationOperatior get excel data validation operator.</code>                                                 | <code>func convDataValidationOperatior(o DataValidationOperator) string {<br>	typeMap := map[DataValidationOperator]string{<br>		DataValidationOperatorBetween:            "between",<br>		DataValidationOperatorEqual:              "equal",<br>		DataValidationOperatorGreaterThan:        "greaterThan",<br>		DataValidationOperatorGreaterThanOrEqual: "greaterThanOrEqual",<br>		DataValidationOperatorLessThan:           "lessThan",<br>		DataValidationOperatorLessThanOrEqual:    "lessThanOrEqual",<br>		DataValidationOperatorNotBetween:         "notBetween",<br>		DataValidationOperatorNotEqual:           "notEqual",<br>	}<br><br>	return typeMap[o]<br><br>}</code> | <code>1.0</code> |
-  | <code>// Convert_v1_PodSecurityPolicyReview_To_security_PodSecurityPolicyReview is an autogenerated conversion function.</code> | <code>func Convert_v1_PodSecurityPolicyReview_To_security_PodSecurityPolicyReview(in *v1.PodSecurityPolicyReview, out *security.PodSecurityPolicyReview, s conversion.Scope) error {<br>	return autoConvert_v1_PodSecurityPolicyReview_To_security_PodSecurityPolicyReview(in, out, s)<br>}</code>                                                                                                                                                                                                                                                                                                                                                                                    | <code>1.0</code> |
-  | <code>// Of note, removeSegments() keeps the ordering of the results stable.</code>                                             | <code>func removeSegments(segments []Segment, toRemove []Segment) []Segment {<br>	rv := make([]Segment, 0, len(segments)-len(toRemove))<br>OUTER:<br>	for _, segment := range segments {<br>		for _, r := range toRemove {<br>			if segment == r {<br>				continue OUTER<br>			}<br>		}<br>		rv = append(rv, segment)<br>	}<br>	return rv<br>}</code>                                                                                                                                                                                                                                                                                                                                 | <code>1.0</code> |
-* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
-  ```json
-  {
-      "scale": 20.0,
-      "similarity_fct": "cos_sim"
-  }
-  ```
-### Training Hyperparameters
-#### Non-Default Hyperparameters
-- `per_device_train_batch_size`: 256
-- `per_device_eval_batch_size`: 256
-- `num_train_epochs`: 5
-- `fp16`: True
-- `multi_dataset_batch_sampler`: round_robin
-#### All Hyperparameters
-<details><summary>Click to expand</summary>
-- `overwrite_output_dir`: False
-- `do_predict`: False
-- `eval_strategy`: no
-- `prediction_loss_only`: True
-- `per_device_train_batch_size`: 256
-- `per_device_eval_batch_size`: 256
-- `per_gpu_train_batch_size`: None
-- `per_gpu_eval_batch_size`: None
-- `gradient_accumulation_steps`: 1
-- `eval_accumulation_steps`: None
-- `torch_empty_cache_steps`: None
-- `learning_rate`: 5e-05
-- `weight_decay`: 0.0
-- `adam_beta1`: 0.9
-- `adam_beta2`: 0.999
-- `adam_epsilon`: 1e-08
-- `max_grad_norm`: 1
-- `num_train_epochs`: 5
-- `max_steps`: -1
-- `lr_scheduler_type`: linear
-- `lr_scheduler_kwargs`: {}
-- `warmup_ratio`: 0.0
-- `warmup_steps`: 0
-- `log_level`: passive
-- `log_level_replica`: warning
-- `log_on_each_node`: True
-- `logging_nan_inf_filter`: True
-- `save_safetensors`: True
-- `save_on_each_node`: False
-- `save_only_model`: False
-- `restore_callback_states_from_checkpoint`: False
-- `no_cuda`: False
-- `use_cpu`: False
-- `use_mps_device`: False
-- `seed`: 42
-- `data_seed`: None
-- `jit_mode_eval`: False
-- `use_ipex`: False
-- `bf16`: False
-- `fp16`: True
-- `fp16_opt_level`: O1
-- `half_precision_backend`: auto
-- `bf16_full_eval`: False
-- `fp16_full_eval`: False
-- `tf32`: None
-- `local_rank`: 0
-- `ddp_backend`: None
-- `tpu_num_cores`: None
-- `tpu_metrics_debug`: False
-- `debug`: []
-- `dataloader_drop_last`: False
-- `dataloader_num_workers`: 0
-- `dataloader_prefetch_factor`: None
-- `past_index`: -1
-- `disable_tqdm`: False
-- `remove_unused_columns`: True
-- `label_names`: None
-- `load_best_model_at_end`: False
-- `ignore_data_skip`: False
-- `fsdp`: []
-- `fsdp_min_num_params`: 0
-- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
-- `tp_size`: 0
-- `fsdp_transformer_layer_cls_to_wrap`: None
-- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
-- `deepspeed`: None
-- `label_smoothing_factor`: 0.0
-- `optim`: adamw_torch
-- `optim_args`: None
-- `adafactor`: False
-- `group_by_length`: False
-- `length_column_name`: length
-- `ddp_find_unused_parameters`: None
-- `ddp_bucket_cap_mb`: None
-- `ddp_broadcast_buffers`: False
-- `dataloader_pin_memory`: True
-- `dataloader_persistent_workers`: False
-- `skip_memory_metrics`: True
-- `use_legacy_prediction_loop`: False
-- `push_to_hub`: False
-- `resume_from_checkpoint`: None
-- `hub_model_id`: None
-- `hub_strategy`: every_save
-- `hub_private_repo`: None
-- `hub_always_push`: False
-- `gradient_checkpointing`: False
-- `gradient_checkpointing_kwargs`: None
-- `include_inputs_for_metrics`: False
-- `include_for_metrics`: []
-- `eval_do_concat_batches`: True
-- `fp16_backend`: auto
-- `push_to_hub_model_id`: None
-- `push_to_hub_organization`: None
-- `mp_parameters`:
-- `auto_find_batch_size`: False
-- `full_determinism`: False
-- `torchdynamo`: None
-- `ray_scope`: last
-- `ddp_timeout`: 1800
-- `torch_compile`: False
-- `torch_compile_backend`: None
-- `torch_compile_mode`: None
-- `include_tokens_per_second`: False
-- `include_num_input_tokens_seen`: False
-- `neftune_noise_alpha`: None
-- `optim_target_modules`: None
-- `batch_eval_metrics`: False
-- `eval_on_start`: False
-- `use_liger_kernel`: False
-- `eval_use_gather_object`: False
-- `average_tokens_across_devices`: False
-- `prompts`: None
-- `batch_sampler`: batch_sampler
-- `multi_dataset_batch_sampler`: round_robin
-</details>
-### Training Logs
-| Epoch  | Step  | Training Loss |
-|:------:|:-----:|:-------------:|
-| 0.0737 | 500   | 0.665         |
-| 0.1474 | 1000  | 0.1173        |
-| 0.2212 | 1500  | 0.1074        |
-| 0.2949 | 2000  | 0.1012        |
-| 0.3686 | 2500  | 0.0968        |
-| 0.4423 | 3000  | 0.0954        |
-| 0.5161 | 3500  | 0.0894        |
-| 0.5898 | 4000  | 0.0913        |
-| 0.6635 | 4500  | 0.0894        |
-| 0.7372 | 5000  | 0.0858        |
-| 0.8110 | 5500  | 0.0878        |
-| 0.8847 | 6000  | 0.0839        |
-| 0.9584 | 6500  | 0.0813        |
-| 1.0321 | 7000  | 0.0607        |
-| 1.1059 | 7500  | 0.0293        |
-| 1.1796 | 8000  | 0.0301        |
-| 1.2533 | 8500  | 0.03          |
-| 1.3270 | 9000  | 0.0332        |
-| 1.4008 | 9500  | 0.032         |
-| 1.4745 | 10000 | 0.0339        |
-| 1.5482 | 10500 | 0.0317        |
-| 1.6219 | 11000 | 0.0336        |
-| 1.6957 | 11500 | 0.0334        |
-| 1.7694 | 12000 | 0.0341        |
-| 1.8431 | 12500 | 0.0335        |
-| 1.9168 | 13000 | 0.0341        |
-| 1.9906 | 13500 | 0.0335        |
-| 2.0643 | 14000 | 0.0137        |
-| 2.1380 | 14500 | 0.0107        |
-| 2.2117 | 15000 | 0.0106        |
-| 2.2855 | 15500 | 0.0108        |
-| 2.3592 | 16000 | 0.0111        |
-| 2.4329 | 16500 | 0.0113        |
-| 2.5066 | 17000 | 0.011         |
-| 2.5804 | 17500 | 0.0114        |
-| 2.6541 | 18000 | 0.0115        |
-| 2.7278 | 18500 | 0.0117        |
-| 2.8015 | 19000 | 0.0115        |
-| 2.8753 | 19500 | 0.0116        |
-| 2.9490 | 20000 | 0.0116        |
-| 3.0227 | 20500 | 0.0102        |
-| 3.0964 | 21000 | 0.0062        |
-| 3.1702 | 21500 | 0.0064        |
-| 3.2439 | 22000 | 0.0065        |
-| 3.3176 | 22500 | 0.0067        |
-| 3.3913 | 23000 | 0.0064        |
-| 3.4651 | 23500 | 0.0062        |
-| 3.5388 | 24000 | 0.0063        |
-| 3.6125 | 24500 | 0.0062        |
-| 3.6862 | 25000 | 0.0063        |
-| 3.7600 | 25500 | 0.0063        |
-| 3.8337 | 26000 | 0.0063        |
-| 3.9074 | 26500 | 0.006         |
-| 3.9811 | 27000 | 0.0061        |
-| 4.0549 | 27500 | 0.0048        |
-| 4.1286 | 28000 | 0.0043        |
-| 4.2023 | 28500 | 0.0044        |
-| 4.2760 | 29000 | 0.0043        |
-| 4.3497 | 29500 | 0.0043        |
-| 4.4235 | 30000 | 0.0044        |
-| 4.4972 | 30500 | 0.0043        |
-| 4.5709 | 31000 | 0.004         |
-| 4.6446 | 31500 | 0.0043        |
-| 4.7184 | 32000 | 0.0042        |
-| 4.7921 | 32500 | 0.0043        |
-| 4.8658 | 33000 | 0.0043        |
-| 4.9395 | 33500 | 0.004         |
-### Framework Versions
-- Python: 3.11.12
-- Sentence Transformers: 3.4.1
-- Transformers: 4.51.3
-- PyTorch: 2.6.0+cu124
-- Accelerate: 1.6.0
-- Datasets: 3.5.1
-- Tokenizers: 0.21.1
-## Citation
-### BibTeX
-#### Sentence Transformers
-```bibtex
-@inproceedings{reimers-2019-sentence-bert,
-    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
-    author = "Reimers, Nils and Gurevych, Iryna",
-    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
-    month = "11",
-    year = "2019",
-    publisher = "Association for Computational Linguistics",
-    url = "https://arxiv.org/abs/1908.10084",
-}
 ```
-#### MultipleNegativesRankingLoss
-```bibtex
-@misc{henderson2017efficient,
-    title={Efficient Natural Language Response Suggestion for Smart Reply},
-    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
-    year={2017},
-    eprint={1705.00652},
-    archivePrefix={arXiv},
-    primaryClass={cs.CL}
-}
-```
-<!--
-## Glossary
-*Clearly define terms in order to be accessible across audiences.*
--->
-<!--
-## Model Card Authors
-*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
--->
-<!--
-## Model Card Contact
-*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
--->

 tags:
 - sentence-transformers
 - sentence-similarity
+- code
+- python
+- php
+- javascript
+- ruby
+- rust
+- go
+- java
 base_model: Shuu12121/CodeModernBERT-Owl-1.0
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
+license: apache-2.0
+language:
+- en
+datasets:
+- Shuu12121/python-codesearch-filtered
+- Shuu12121/java-codesearch-filtered
+- Shuu12121/javascript-codesearch-filtered
+- Shuu12121/go-codesearch-filtered
+- Shuu12121/php-codesearch-filtered
+- Shuu12121/ruby-codesearch-filtered
+- Shuu12121/rust-codesearch-filtered
+- code-search-net/code_search_net
 ---
+# **🦉 CodeSearch-ModernBERT-Owl-Plus: High-Performance Sentence-BERT for Code Search**
+**CodeSearch-ModernBERT-Owl-Plus** is a high-performance code search model fine-tuned in a Sentence-BERT architecture, based on the pretrained **CodeModernBERT-Owl v1.0**.
+This model is optimized for function-level search within codebases and natural language queries, achieving state-of-the-art results on the MTEB benchmark.
+---
+# **🛠 Features**
+* ✅ Fine-tuned in Sentence-BERT format from CodeModernBERT-Owl
+* ✅ Supports multiple languages (Python, Java, JavaScript, etc.)
+* ✅ Specialized encoder for high-accuracy code search
+* ✅ Ideal for multi-stage (dual encoder) retrieval setups
+* ✅ Generates rich semantic embeddings for code and queries
+---
+# **📊 Evaluation on MTEB Benchmark**
+## **🏆 Main Scores in MTEB**
+This model achieved the following **main scores** (based on NDCG\@10):
+* **CodeSearchNetRetrieval**: `main_score = 0.8918`
+* **COIR-CodeSearchNetRetrieval**: `main_score = 0.8013`
+---
+### 🧪 **CodeSearchNetRetrieval (MTEB)**
+| Metric        | Score      |
+| ------------- | ---------- |
+| **MRR\@10**   | **0.8704** |
+| **NDCG\@10**  | 0.8918     |
+| MAP\@10       | 0.8704     |
+| Recall\@10    | 0.9563     |
+| Precision\@10 | 0.0956     |
+This model achieves strong performance across all ranking metrics and demonstrates balanced retrieval capability.
+---
+### 🧪 **COIR-CodeSearchNetRetrieval (MTEB)**
+| Metric        | Score      |
+| ------------- | ---------- |
+| **MRR\@10**   | **0.7751** |
+| **NDCG\@10**  | 0.8013     |
+| MAP\@10       | 0.7751     |
+| Recall\@10    | 0.8826     |
+| Precision\@10 | 0.0883     |
+Robust and consistent performance is also maintained on the COIR dataset, demonstrating strong generalization.
+---
+# **📥 Usage Example**
+```python
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer("Shuu12121/CodeSearch-ModernBERT-Owl-Plus")
+embeddings = model.encode(["binary search function", "def binary_search(arr, target): ..."])
 ```
+---
+# **📝 Conclusion**
+* ✅ An optimized Sentence-BERT model based on CodeModernBERT-Owl
+* ✅ Achieves MRR\@10 > 0.87 on MTEB CodeSearchNetRetrieval
+* ✅ Ready for integration in production-level code search systems
+---
+# **📜 License**
+📄 Apache-2.0
+# **📧 Contact**
+For questions or inquiries, feel free to reach out:
+📧 **[[email protected]](mailto:[email protected])**