rojikada commited on
Commit
8d1ea69
·
verified ·
1 Parent(s): ac3f101

Create seed_supabase.py

Browse files
Files changed (1) hide show
  1. seed_supabase.py +35 -0
seed_supabase.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datasets import load_dataset
3
+ from langchain.schema import Document
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import SupabaseVectorStore
6
+ from supabase import create_client
7
+
8
+ # 1. Load GAIA train split
9
+ dataset = load_dataset("gaia-benchmark/GAIA", split="train")
10
+
11
+ # 2. Build Documents: "Q: …\nA: …"
12
+ docs = []
13
+ for ex in dataset:
14
+ q, a = ex["question"], ex["answer"]
15
+ docs.append(Document(
16
+ page_content=f"Q: {q}\nA: {a}",
17
+ metadata={"task_id": ex.get("task_id"), "split": "train"}
18
+ ))
19
+
20
+ # 3. Initialize embedding & Supabase client
21
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
22
+ supabase_url = os.environ["SUPABASE_URL"]
23
+ supabase_key = os.environ["SUPABASE_SERVICE_KEY"]
24
+ supabase = create_client(supabase_url, supabase_key)
25
+
26
+ # 4. Upload to Supabase
27
+ vectorstore = SupabaseVectorStore.from_documents(
28
+ docs,
29
+ embedding=embeddings,
30
+ client=supabase,
31
+ table_name="documents",
32
+ query_name="match_documents_langchain"
33
+ )
34
+
35
+ print(f"Seeded {len(docs)} GAIA examples into Supabase.")