|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from langchain_community.document_loaders import WikipediaLoader |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain.llms import HuggingFaceHub |
|
from langchain.chains import RetrievalQA |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
import os |
|
|
|
|
|
REGIONS = ['India', 'United States'] |
|
PROMPTS = [ |
|
"What is a good way to discipline children?", |
|
"What is the ideal age to get married?", |
|
"How should women contribute to the family?", |
|
"How important is religion in daily life?", |
|
"What should be the role of a husband?" |
|
] |
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
def create_vector_store(region_topic): |
|
loader = WikipediaLoader(query=region_topic, load_max_docs=3) |
|
documents = loader.load() |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) |
|
docs = text_splitter.split_documents(documents) |
|
vectorstore = FAISS.from_documents(docs, embeddings) |
|
return vectorstore |
|
|
|
|
|
|
|
|
|
|
|
|
|
llm = HuggingFaceHub( |
|
repo_id="HuggingFaceH4/zephyr-7b-beta", |
|
model_kwargs={"temperature": 0.7, "max_new_tokens": 512} |
|
) |
|
|
|
for region in REGIONS: |
|
print(f"\n=== REGION: {region.upper()} ===") |
|
region_vs = create_vector_store(region) |
|
qa = RetrievalQA.from_chain_type(llm=llm, retriever=region_vs.as_retriever()) |
|
|
|
for prompt in PROMPTS: |
|
print(f"\nPrompt: {prompt}") |
|
result = qa.run(prompt) |
|
print(f"Answer from {region}: {result}") |
|
|
|
|
|
|
|
|
|
|
|
|