|
from huggingface_hub import hf_hub_download |
|
from langchain.llms import LlamaCpp |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.memory import ConversationBufferMemory |
|
|
|
def load_llm(): |
|
""" |
|
Downloads the GGUF model for Arabic and loads it via llama-cpp. |
|
""" |
|
model_file = hf_hub_download( |
|
repo_id="mobeidat/c4ai-command-r7b-arabic-02-2025-Q4_K_M-GGUF", |
|
filename="c4ai-command-r7b-arabic-02-2025-q4_k_m.gguf", |
|
local_dir="./models", |
|
local_dir_use_symlinks=False |
|
) |
|
|
|
|
|
llm = LlamaCpp( |
|
model_path=model_file, |
|
flash_attn=False, |
|
n_ctx=2048, |
|
n_batch=512, |
|
chat_format="chatml", |
|
grammar="", |
|
streaming=True, |
|
grammar_path="", |
|
use_jinja=False, |
|
rope_freq_base=10000.0, |
|
rope_freq_scale=1.0 |
|
) |
|
|
|
return llm |
|
|
|
def build_conversational_chain(vectorstore): |
|
""" |
|
Creates a ConversationalRetrievalChain using the local llama-cpp-based LLM |
|
and a ConversationBufferMemory for multi-turn Q&A. |
|
""" |
|
llm = load_llm() |
|
|
|
memory = ConversationBufferMemory( |
|
memory_key="chat_history", |
|
return_messages=True |
|
) |
|
|
|
qa_chain = ConversationalRetrievalChain.from_llm( |
|
llm=llm, |
|
retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}), |
|
memory=memory, |
|
verbose=True |
|
) |
|
|
|
return qa_chain |
|
|