NghiBuine's picture
Add new SentenceTransformer model
272f4d3 verified
metadata
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:333
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
base_model: keepitreal/vietnamese-sbert
widget:
  - source_sentence: Tôi Thấy Hoa Vàng Trên Cỏ Xanh
    sentences:
      - mềm mại, thoáng khí  bền đẹp
      - Nike Air Force 1 phong cách không lỗi mốt
      - >-
        Tôi Thấy Hoa Vàng Trên Cỏ Xanh thông điệp trân trọng tuổi thơ và cuộc
        sống bình dị
  - source_sentence: iPhone 16
    sentences:
      -  Phê Cùng Tony kết hợp giải trí  giáo dục
      - iPhone 16 Pro RAM 12GB đa nhiệm mạnh mẽ
      - Loafer Gucci size từ 38 đến 45
  - source_sentence: Áo Thun
    sentences:
      - phù hợp trong thời tiết nóng bức
      - thấm hút mồ hôi, nhẹ  thoáng khí
      - Giày chạy đường dài bền nhẹ
  - source_sentence: Son Môi MAC Matte Lipstick - Ruby Woo
    sentences:
      - >-
        bảo quản dễ dàng bằng cách lộn trái khi giặt, tránh chất tẩy mạnh và
        phơi nơi thoáng mát
      - chất son  mịn, bám màu 6-8 giờ
      - tác phẩm kinh điển về tâm linh  triết học
  - source_sentence: LEGO City Police Station
    sentences:
      -  hình đẹp mắt để trưng bày
      - dễ dàng phối đồ từ áo thun,  mi đến blazer
      - chỉ số SPF 50+ PA+++ bảo vệ tối ưu khỏi tia UV
pipeline_tag: sentence-similarity
library_name: sentence-transformers
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@3
  - cosine_accuracy@5
  - cosine_accuracy@10
  - cosine_precision@1
  - cosine_precision@3
  - cosine_precision@5
  - cosine_precision@10
  - cosine_recall@1
  - cosine_recall@3
  - cosine_recall@5
  - cosine_recall@10
  - cosine_ndcg@10
  - cosine_mrr@10
  - cosine_map@100
model-index:
  - name: SentenceTransformer based on keepitreal/vietnamese-sbert
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 768
          type: dim_768
        metrics:
          - type: cosine_accuracy@1
            value: 0
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.02702702702702703
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.5675675675675675
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.005405405405405406
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.056756756756756774
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.02702702702702703
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.5675675675675675
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.1783581729179075
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.07062419562419564
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.07973358512714
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 512
          type: dim_512
        metrics:
          - type: cosine_accuracy@1
            value: 0
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.5405405405405406
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.054054054054054064
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.5405405405405406
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.1701742309301506
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.06747104247104248
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.0782135520060237
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 256
          type: dim_256
        metrics:
          - type: cosine_accuracy@1
            value: 0
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.5405405405405406
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.054054054054054064
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.5405405405405406
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.17224374024595593
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.06948734448734449
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.07938312163919391
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 128
          type: dim_128
        metrics:
          - type: cosine_accuracy@1
            value: 0
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.5405405405405406
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.054054054054054064
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.5405405405405406
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.1706353981690823
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.06785714285714285
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.07606072355570134
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 64
          type: dim_64
        metrics:
          - type: cosine_accuracy@1
            value: 0
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.02702702702702703
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.5135135135135135
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.005405405405405406
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.05135135135135136
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.02702702702702703
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.5135135135135135
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.16481648451068456
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.06733161733161734
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.07793528025726168
            name: Cosine Map@100

SentenceTransformer based on keepitreal/vietnamese-sbert

This is a sentence-transformers model finetuned from keepitreal/vietnamese-sbert on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: keepitreal/vietnamese-sbert
  • Maximum Sequence Length: 256 tokens
  • Output Dimensionality: 768 dimensions
  • Similarity Function: Cosine Similarity
  • Training Dataset:
    • json

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: RobertaModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("NghiBuine/ecommerce-search-model")
# Run inference
sentences = [
    'LEGO City Police Station',
    'mô hình đẹp mắt để trưng bày',
    'dễ dàng phối đồ từ áo thun, sơ mi đến blazer',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Information Retrieval

Metric Value
cosine_accuracy@1 0.0
cosine_accuracy@3 0.0
cosine_accuracy@5 0.027
cosine_accuracy@10 0.5676
cosine_precision@1 0.0
cosine_precision@3 0.0
cosine_precision@5 0.0054
cosine_precision@10 0.0568
cosine_recall@1 0.0
cosine_recall@3 0.0
cosine_recall@5 0.027
cosine_recall@10 0.5676
cosine_ndcg@10 0.1784
cosine_mrr@10 0.0706
cosine_map@100 0.0797

Information Retrieval

Metric Value
cosine_accuracy@1 0.0
cosine_accuracy@3 0.0
cosine_accuracy@5 0.0
cosine_accuracy@10 0.5405
cosine_precision@1 0.0
cosine_precision@3 0.0
cosine_precision@5 0.0
cosine_precision@10 0.0541
cosine_recall@1 0.0
cosine_recall@3 0.0
cosine_recall@5 0.0
cosine_recall@10 0.5405
cosine_ndcg@10 0.1702
cosine_mrr@10 0.0675
cosine_map@100 0.0782

Information Retrieval

Metric Value
cosine_accuracy@1 0.0
cosine_accuracy@3 0.0
cosine_accuracy@5 0.0
cosine_accuracy@10 0.5405
cosine_precision@1 0.0
cosine_precision@3 0.0
cosine_precision@5 0.0
cosine_precision@10 0.0541
cosine_recall@1 0.0
cosine_recall@3 0.0
cosine_recall@5 0.0
cosine_recall@10 0.5405
cosine_ndcg@10 0.1722
cosine_mrr@10 0.0695
cosine_map@100 0.0794

Information Retrieval

Metric Value
cosine_accuracy@1 0.0
cosine_accuracy@3 0.0
cosine_accuracy@5 0.0
cosine_accuracy@10 0.5405
cosine_precision@1 0.0
cosine_precision@3 0.0
cosine_precision@5 0.0
cosine_precision@10 0.0541
cosine_recall@1 0.0
cosine_recall@3 0.0
cosine_recall@5 0.0
cosine_recall@10 0.5405
cosine_ndcg@10 0.1706
cosine_mrr@10 0.0679
cosine_map@100 0.0761

Information Retrieval

Metric Value
cosine_accuracy@1 0.0
cosine_accuracy@3 0.0
cosine_accuracy@5 0.027
cosine_accuracy@10 0.5135
cosine_precision@1 0.0
cosine_precision@3 0.0
cosine_precision@5 0.0054
cosine_precision@10 0.0514
cosine_recall@1 0.0
cosine_recall@3 0.0
cosine_recall@5 0.027
cosine_recall@10 0.5135
cosine_ndcg@10 0.1648
cosine_mrr@10 0.0673
cosine_map@100 0.0779

Training Details

Training Dataset

json

  • Dataset: json
  • Size: 333 training samples
  • Columns: positive and anchor
  • Approximate statistics based on the first 333 samples:
    positive anchor
    type string string
    details
    • min: 4 tokens
    • mean: 9.73 tokens
    • max: 37 tokens
    • min: 6 tokens
    • mean: 13.71 tokens
    • max: 41 tokens
  • Samples:
    positive anchor
    Giày Chạy Bộ Adidas Ultraboost Ultraboost đế continental chống trượt
    Cà Phê Cùng Tony Cà Phê Cùng Tony chia sẻ bài học phát triển bản thân và sống tích cực
    Đắc Nhân Tâm phát triển kỹ năng thuyết phục và giao tiếp tự nhiên
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: epoch
  • per_device_train_batch_size: 32
  • gradient_accumulation_steps: 16
  • learning_rate: 2e-05
  • num_train_epochs: 4
  • bf16: True
  • load_best_model_at_end: True

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: epoch
  • prediction_loss_only: True
  • per_device_train_batch_size: 32
  • per_device_eval_batch_size: 8
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 16
  • eval_accumulation_steps: None
  • learning_rate: 2e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 4
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.0
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: True
  • fp16: False
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: True
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • prompts: None
  • batch_sampler: batch_sampler
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step dim_768_cosine_ndcg@10 dim_512_cosine_ndcg@10 dim_256_cosine_ndcg@10 dim_128_cosine_ndcg@10 dim_64_cosine_ndcg@10
1.0 1 0.1716 0.1897 0.1450 0.1699 0.1542
2.0 3 0.179 0.171 0.1722 0.1719 0.1644
2.9091 4 0.1784 0.1702 0.1722 0.1706 0.1648
  • The bold row denotes the saved checkpoint.

Framework Versions

  • Python: 3.11.9
  • Sentence Transformers: 4.1.0
  • Transformers: 4.41.2
  • PyTorch: 2.6.0+cu124
  • Accelerate: 1.7.0
  • Datasets: 2.19.1
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning},
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}