Model Card for Model ID
This modelcard aims to be a base template for new models. It has been generated using this raw template.
Model Description
- Developed by: [Xlar @ CBT IITD]
- Funded by [optional]: [HPC IITD]
- Shared by [optional]: [Xlar]
- Model type: []
- Language(s) (NLP): []
- License: [More Information Needed]
- Finetuned from model [optional]: ["unsloth/llama-3-8b-bnb-4bit"]
Model Sources [optional]
- Repository: [More Information Needed]
- Paper [optional]: [More Information Needed]
- Demo [optional]: [More Information Needed]
Uses
This model can be used by clinicians or medical professionals as a trial for implementing LLM for information retrieval from clinical notes
Bias, Risks, and Limitations
It has not been tested in hospital settings!!!
[More Information Needed]
How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
#load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
inf_model, tokenizer = FastLanguageModel.from_pretrained(
model_name = Model_path, # YOUR MODEL YOU USED FOR TRAINING
# model_name = "unsloth/llama-3-8b-bnb-4bit",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = True,
)
FastLanguageModel.for_inference(inf_model) # Enable native 2x faster inference
#text_streamer = TextStreamer(tokenizer)
Evaluation
Use this code for evaluation
model_size = sum(t.numel() for t in inf_model.parameters())
print(f"mistral 7b size: {model_size/1000**2:.1f}M Parameters")
tokenizer.pad_token = tokenizer.eos_token
import csv
inf_alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request. """
Instruction = "Kindly complete the following task :" + example['Task']
prompt = example['clinical_note'] +"\n" + 'question:' + example['question']
answer = example['answer']
text = inf_alpaca_prompt.format(Instruction, prompt)
model_inputs = tokenizer(
text,
max_length=2048,
truncation=True,
padding = False,
return_tensors="pt",
)
model_inputs.to(torch_device)
outputs = inf_model.generate(
**model_inputs,
#min_new_tokens = 50,
max_new_tokens = 150, ## very imp otherwise model outputs a lot of extended text
num_return_sequences = 1,
#do_sample=True,
#top_k = 40,
#temperature=0.7,
#top_p=0.95,
#repetition_penalty = 1.1,
#no_repeat_ngram_size =0 ,
#num_beams=5,
) # disable sampling to test if batching affects output
output = outputs[0]
Instruction:
{}
Input:
{}
Response:
"""
Testing Data, Factors & Metrics
Code for evaluating the generation on ROUGE and BLEU metric
import numpy as np
from nltk.tokenize import sent_tokenize
import evaluate
import nltk
#nltk.download('punkt')
from datasets import load_metric
rouge = load_metric("rouge")
bleu = evaluate.load("bleu")
#rouge_score = evaluate.load("rouge")
decoded_preds = ["My name is Sanjeet Patil"]
decoded_labels = ["My name is Sanjeet"]
# result = rouge.compute(predictions=decoded_preds, references = decoded_labels,use_aggregator = True)
# print(result)
def compute_metrics(decoded_preds, decoded_labels):
# predictions, labels = eval_pred
# Decode generated summaries into text
# decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
# Replace -100 in the labels as we can't decode them
# labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
# Decode reference summaries into text
# decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
# decoded_labels = tokenizer.decode(labels, skip_special_tokens=True)
# ROUGE expects a newline after each sentence
# decoded_preds = ["\n".join(sent_tokenize(pred.strip())) for pred in decoded_preds]
# decoded_labels = ["\n".join(sent_tokenize(label.strip())) for label in decoded_labels]
decoded_preds = ["\n".join(sent_tokenize(decoded_preds.strip()))]
decoded_labels = ["\n".join(sent_tokenize(decoded_labels.strip()))]
# print(decoded_preds)
# print(decoded_labels)
# print("decoded_preds",len(decoded_preds))
# print("decoded_labels",len(decoded_labels))
# print(decoded_preds)
# Compute ROUGE scores
# result = rouge_score.compute(
# predictions=decoded_preds, references=decoded_labels, use_stemmer=True
# )
result_rouge = rouge.compute(predictions=decoded_preds, references = decoded_labels,use_aggregator = True)
try:
result_bleu = bleu.compute(predictions=decoded_preds, references=decoded_labels)
except:
pass
# Extract the median scores
# result = {key: value * 100 for key, value in result.items()}
# return {k: round(v, 4) for k, v in result.items()}
return result_rouge, result_bleu
- Downloads last month
- 5
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.
Model tree for Xlar/orpo-qlora-mtmed-llama3-8b
Base model
meta-llama/Meta-Llama-3-8B