Spaces:
Sleeping
Sleeping
import os | |
from huggingface_hub import hf_hub_download | |
from langchain.llms import LlamaCpp | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.memory import ConversationBufferMemory | |
def load_llm(): | |
""" | |
Downloads the GGUF model for Arabic and loads it via llama-cpp. | |
""" | |
model_file = hf_hub_download( | |
repo_id="mobeidat/c4ai-command-r7b-arabic-02-2025-Q4_K_M-GGUF", | |
filename="c4ai-command-r7b-arabic-02-2025-q4_k_m.gguf", | |
local_dir="./models", | |
local_dir_use_symlinks=False | |
) | |
# Pass an empty grammar file to bypass Jinja template parsing. | |
llm = LlamaCpp( | |
model_path=model_file, | |
flash_attn=False, | |
n_ctx=2048, | |
n_batch=512, | |
chat_format="chatml", | |
grammar=None, | |
streaming=True, | |
grammar_path=None, # ensure this file exists and is empty | |
use_jinja=False, | |
rope_freq_base=10000.0, | |
rope_freq_scale=1.0, | |
use_mmap=True, | |
last_n_tokens_size=64, | |
echo=False, | |
repeat_penalty=1.1, | |
temperature=0.8, | |
top_k=40, | |
top_p=0.95, | |
logprobs=None, | |
callback_manager=None, | |
custom_get_token_ids = None, | |
lora_base = None, | |
lora_path = None, | |
max_tokens = 256, | |
metadata= None, | |
n_gpu_layers= None, | |
n_threads= None, | |
stop=[], | |
suffix= None, | |
tags = None, | |
use_mlock=False, | |
vocab_only=False, | |
logits_all= False, | |
callbacks=None, | |
f16_kv=True, | |
n_parts=-1, | |
seed=-1, | |
verbose=True, | |
client=None, | |
cache=None | |
) | |
return llm | |
def build_conversational_chain(vectorstore): | |
""" | |
Creates a ConversationalRetrievalChain using the local llama-cpp-based LLM | |
and a ConversationBufferMemory for multi-turn Q&A. | |
""" | |
llm = load_llm() | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
return_messages=True | |
) | |
qa_chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}), | |
memory=memory, | |
verbose=True | |
) | |
return qa_chain | |