--- license: apache-2.0 language: - hi base_model: unsloth/Meta-Llama-3.1-8B library_name: peft pipeline_tag: text-generation --- ## ЁЯФз Inference Example ```python import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import torch import re MODEL_PATH = "panchajanya-ai/Sankhaya_Indic_ITN" FINETUNED_ADAPTER_PATH = "panchajanya-ai/Sankhaya_Indic_ITN" # Load model & tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) base_model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, torch_dtype=torch.float32, device_map="cpu" ) try: model = PeftModel.from_pretrained(base_model, FINETUNED_ADAPTER_PATH) except Exception: model = base_model model.eval() alpaca_prompt = """### Instruction: Convert the Hindi number words to numeric form. ### Input: {} ### Response: """ # Define common Hindi number word to digit map with open("hindi_number_mappings.txt", "r", encoding="utf-8") as file: file_content = file.read() # Safely evaluate the string content as a Python dictionary STATIC_NUM_WORD_MAP = ast.literal_eval(file_content) # Extract all the keys (Hindi number words) into HINDI_NUM_WORDS HINDI_NUM_WORDS = list(STATIC_NUM_WORD_MAP.keys()) # Extract number word chunks from sentence def extract_all_number_chunks(sentence): words = sentence.split() chunks, temp = [], [] for word in words: if word in HINDI_NUM_WORDS or re.match(r"^[реж-реп]+$", word): temp.append(word) else: if temp: chunks.append(" ".join(temp)) temp = [] if temp: chunks.append(" ".join(temp)) return chunks # Run LLM if not static mapped def convert_digit_words_to_number(chunk): # First check static map if chunk in STATIC_NUM_WORD_MAP: return STATIC_NUM_WORD_MAP[chunk] # Else ask model prompt = alpaca_prompt.format(chunk) inputs = tokenizer(prompt, return_tensors="pt").to("cpu") outputs = model.generate( **inputs, max_new_tokens=10, use_cache=True, eos_token_id=tokenizer.eos_token_id ) decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) match = re.search(r"Response:\s*(\d+)", decoded) return match.group(1) if match else None # Main replacement function def remove_duplicates(s): seen = set() result = [] for char in s: if char not in seen: seen.add(char) result.append(char) return ''.join(result) def ITN(sentence: str) -> str: chunks = extract_all_number_chunks(sentence) for chunk in chunks: numeric = convert_digit_words_to_number(chunk) if numeric: sentence = sentence.replace(chunk, numeric) return sentence result = ITN("рдХреГрд╖реНрдг рдХреЛ рдкреИрдВрддрд╛рд▓реАрд╕ рд╣рдЬрд╛рд░ рдмрд╛рдирд╡реЗ рд░реБрдкрдпреЗ рднреЗрдЬреЗрдВ") print(result)