🔧 Inference Example
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
import re
MODEL_PATH = "panchajanya-ai/Sankhaya_Indic_ITN"
FINETUNED_ADAPTER_PATH = "panchajanya-ai/Sankhaya_Indic_ITN"
# Load model & tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
torch_dtype=torch.float32,
device_map="cpu"
)
try:
model = PeftModel.from_pretrained(base_model, FINETUNED_ADAPTER_PATH)
except Exception:
model = base_model
model.eval()
alpaca_prompt = """### Instruction:
Convert the Hindi number words to numeric form.
### Input:
{}
### Response:
"""
# Define common Hindi number word to digit map
with open("hindi_number_mappings.txt", "r", encoding="utf-8") as file:
file_content = file.read()
# Safely evaluate the string content as a Python dictionary
STATIC_NUM_WORD_MAP = ast.literal_eval(file_content)
# Extract all the keys (Hindi number words) into HINDI_NUM_WORDS
HINDI_NUM_WORDS = list(STATIC_NUM_WORD_MAP.keys())
# Extract number word chunks from sentence
def extract_all_number_chunks(sentence):
words = sentence.split()
chunks, temp = [], []
for word in words:
if word in HINDI_NUM_WORDS or re.match(r"^[०-९]+$", word):
temp.append(word)
else:
if temp:
chunks.append(" ".join(temp))
temp = []
if temp:
chunks.append(" ".join(temp))
return chunks
# Run LLM if not static mapped
def convert_digit_words_to_number(chunk):
# First check static map
if chunk in STATIC_NUM_WORD_MAP:
return STATIC_NUM_WORD_MAP[chunk]
# Else ask model
prompt = alpaca_prompt.format(chunk)
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
outputs = model.generate(
**inputs,
max_new_tokens=10,
use_cache=True,
eos_token_id=tokenizer.eos_token_id
)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
match = re.search(r"Response:\s*(\d+)", decoded)
return match.group(1) if match else None
# Main replacement function
def remove_duplicates(s):
seen = set()
result = []
for char in s:
if char not in seen:
seen.add(char)
result.append(char)
return ''.join(result)
def ITN(sentence: str) -> str:
chunks = extract_all_number_chunks(sentence)
for chunk in chunks:
numeric = convert_digit_words_to_number(chunk)
if numeric:
sentence = sentence.replace(chunk, numeric)
return sentence
result = ITN("कृष्ण को पैंतालीस हजार बानवे रुपये भेजें")
print(result)
- Downloads last month
- 40
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support
Model tree for panchajanya-ai/Sankhaya_Indic_ITN
Base model
unsloth/Meta-Llama-3.1-8B