File size: 2,948 Bytes
91e999e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os

# Base paths
BASE_DIR = os.getenv('BASE_DIR', os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
PDF_DIR = os.path.join(BASE_DIR, "data", "pdfs")
TXT_DIR = os.path.join(BASE_DIR, "data", "texts")  
INDEX_DIR = os.path.join(BASE_DIR, "data", "index")

# Ensure directories exist
os.makedirs(PDF_DIR, exist_ok=True)
os.makedirs(TXT_DIR, exist_ok=True)
os.makedirs(INDEX_DIR, exist_ok=True)

# Model settings
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"

# Documents path (contains both PDFs and TXTs)
DATA_DIR = os.path.join(BASE_DIR, "data")
DOCUMENTS_PATH = os.path.join(INDEX_DIR, "documents.pkl")

# Index file paths
FAISS_INDEX_PATH = os.path.join(INDEX_DIR, "faiss_index.pkl")

# Science topics (PDFs)
SCIENCE_FILES = [
    os.path.join(PDF_DIR, "mazingira g3.pdf"),
    os.path.join(PDF_DIR, "nishati g3.pdf"),
    os.path.join(PDF_DIR, "maada g3.pdf"),
    os.path.join(PDF_DIR, "mawasiliano g3.pdf"),
    os.path.join(PDF_DIR, "usafi g3.pdf"),
    os.path.join(PDF_DIR, "vipimo g3.pdf"),
    os.path.join(PDF_DIR, "mlo g3.pdf"),
    os.path.join(PDF_DIR, "mfumo g3.pdf"),
    os.path.join(PDF_DIR, "maambukizi g3.pdf"),
    os.path.join(PDF_DIR, "huduma g3.pdf"),
    os.path.join(PDF_DIR, "vifaa g3.pdf"),
    os.path.join(TXT_DIR, "kinga ya mwili g4.txt"),
    os.path.join(TXT_DIR, "ukimwi g4.txt"),
    os.path.join(TXT_DIR, "maji g4.txt"),
    os.path.join(TXT_DIR, "majaribio ya kisayansi g4.txt"),
    os.path.join(TXT_DIR, "magonjwa g4.txt"),
    os.path.join(TXT_DIR, "huduma g4.txt"),
    os.path.join(TXT_DIR, "mazingira g4.txt"),
    os.path.join(TXT_DIR, "nishati g4.txt"),
    os.path.join(TXT_DIR, "matumizi ya nishati g4.txt"),
    os.path.join(TXT_DIR, "mfumo g4.txt"),
    os.path.join(TXT_DIR, "mawasiliano g4.txt"),
]

# Math topics (TXT files)
MATH_FILES = [
    os.path.join(TXT_DIR, "namba g3.txt"),
    os.path.join(TXT_DIR, "mpangilio g3.txt"),
    os.path.join(TXT_DIR, "mpangilio g4.txt"),
    os.path.join(TXT_DIR, "matendo katika namba g3.txt"),
    os.path.join(TXT_DIR, "kutambua sehemu g3.txt"),
    os.path.join(TXT_DIR, "kutambua maumbo g3.txt"),
    os.path.join(TXT_DIR, "vipimo g3.txt"),
    os.path.join(TXT_DIR, "vipimo g4.txt"),
    os.path.join(TXT_DIR, "wakati g4.txt"),
    os.path.join(TXT_DIR, "takwimu kwa picha g3.txt"),
    os.path.join(TXT_DIR, "takwimu g4.txt"),
    os.path.join(TXT_DIR, "kugawanya namba g4.txt"),
    os.path.join(TXT_DIR, "kujumlisha namba g4.txt"),
    os.path.join(TXT_DIR, "kutoa namba g4.txt"),
    os.path.join(TXT_DIR, "kuzidisha namba g4.txt"),
    os.path.join(TXT_DIR, "namba nzima g4.txt"),
    os.path.join(TXT_DIR, "namba za kirumi g4.txt"),
    os.path.join(TXT_DIR, "fedha g3.txt"),
    os.path.join(TXT_DIR, "fedha g4.txt"),
    os.path.join(TXT_DIR, "sehemu g4.txt"),
    os.path.join(TXT_DIR, "maumbo g4.txt"),
]

# Combined list of all files
ALL_FILES = SCIENCE_FILES + MATH_FILES