File size: 2,948 Bytes
91e999e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os
# Base paths
BASE_DIR = os.getenv('BASE_DIR', os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
PDF_DIR = os.path.join(BASE_DIR, "data", "pdfs")
TXT_DIR = os.path.join(BASE_DIR, "data", "texts")
INDEX_DIR = os.path.join(BASE_DIR, "data", "index")
# Ensure directories exist
os.makedirs(PDF_DIR, exist_ok=True)
os.makedirs(TXT_DIR, exist_ok=True)
os.makedirs(INDEX_DIR, exist_ok=True)
# Model settings
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
# Documents path (contains both PDFs and TXTs)
DATA_DIR = os.path.join(BASE_DIR, "data")
DOCUMENTS_PATH = os.path.join(INDEX_DIR, "documents.pkl")
# Index file paths
FAISS_INDEX_PATH = os.path.join(INDEX_DIR, "faiss_index.pkl")
# Science topics (PDFs)
SCIENCE_FILES = [
os.path.join(PDF_DIR, "mazingira g3.pdf"),
os.path.join(PDF_DIR, "nishati g3.pdf"),
os.path.join(PDF_DIR, "maada g3.pdf"),
os.path.join(PDF_DIR, "mawasiliano g3.pdf"),
os.path.join(PDF_DIR, "usafi g3.pdf"),
os.path.join(PDF_DIR, "vipimo g3.pdf"),
os.path.join(PDF_DIR, "mlo g3.pdf"),
os.path.join(PDF_DIR, "mfumo g3.pdf"),
os.path.join(PDF_DIR, "maambukizi g3.pdf"),
os.path.join(PDF_DIR, "huduma g3.pdf"),
os.path.join(PDF_DIR, "vifaa g3.pdf"),
os.path.join(TXT_DIR, "kinga ya mwili g4.txt"),
os.path.join(TXT_DIR, "ukimwi g4.txt"),
os.path.join(TXT_DIR, "maji g4.txt"),
os.path.join(TXT_DIR, "majaribio ya kisayansi g4.txt"),
os.path.join(TXT_DIR, "magonjwa g4.txt"),
os.path.join(TXT_DIR, "huduma g4.txt"),
os.path.join(TXT_DIR, "mazingira g4.txt"),
os.path.join(TXT_DIR, "nishati g4.txt"),
os.path.join(TXT_DIR, "matumizi ya nishati g4.txt"),
os.path.join(TXT_DIR, "mfumo g4.txt"),
os.path.join(TXT_DIR, "mawasiliano g4.txt"),
]
# Math topics (TXT files)
MATH_FILES = [
os.path.join(TXT_DIR, "namba g3.txt"),
os.path.join(TXT_DIR, "mpangilio g3.txt"),
os.path.join(TXT_DIR, "mpangilio g4.txt"),
os.path.join(TXT_DIR, "matendo katika namba g3.txt"),
os.path.join(TXT_DIR, "kutambua sehemu g3.txt"),
os.path.join(TXT_DIR, "kutambua maumbo g3.txt"),
os.path.join(TXT_DIR, "vipimo g3.txt"),
os.path.join(TXT_DIR, "vipimo g4.txt"),
os.path.join(TXT_DIR, "wakati g4.txt"),
os.path.join(TXT_DIR, "takwimu kwa picha g3.txt"),
os.path.join(TXT_DIR, "takwimu g4.txt"),
os.path.join(TXT_DIR, "kugawanya namba g4.txt"),
os.path.join(TXT_DIR, "kujumlisha namba g4.txt"),
os.path.join(TXT_DIR, "kutoa namba g4.txt"),
os.path.join(TXT_DIR, "kuzidisha namba g4.txt"),
os.path.join(TXT_DIR, "namba nzima g4.txt"),
os.path.join(TXT_DIR, "namba za kirumi g4.txt"),
os.path.join(TXT_DIR, "fedha g3.txt"),
os.path.join(TXT_DIR, "fedha g4.txt"),
os.path.join(TXT_DIR, "sehemu g4.txt"),
os.path.join(TXT_DIR, "maumbo g4.txt"),
]
# Combined list of all files
ALL_FILES = SCIENCE_FILES + MATH_FILES |