jan-v1-research / app.py
darwincb's picture
Add multiple app versions - minimal for fast loading
7be49ed
raw
history blame
9.15 kB
"""
Jan v1 Research Assistant - WITH REAL WEB SEARCH
"""
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import requests
from bs4 import BeautifulSoup
import json
import urllib.parse
# Initialize model with error handling
print("πŸš€ Loading Jan v1...")
model_name = "janhq/Jan-v1-4B"
try:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
load_in_4bit=True,
trust_remote_code=True,
low_cpu_mem_usage=True
)
print("βœ… Jan v1 loaded!")
model_loaded = True
except Exception as e:
print(f"❌ Error loading Jan v1: {e}")
print("πŸ”„ Using simplified fallback...")
# Simple fallback that always works
tokenizer = None
model = None
model_loaded = False
class RealWebSearch:
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
def search_web(self, query, num_results=3):
"""Real web search using multiple methods"""
results = []
# Method 1: Try Google Search (via scraping)
try:
search_url = f"https://www.google.com/search?q={urllib.parse.quote(query)}"
response = self.session.get(search_url, timeout=5)
soup = BeautifulSoup(response.text, 'html.parser')
# Find search results
search_divs = soup.find_all('div', class_='g')[:num_results]
for div in search_divs:
title_elem = div.find('h3')
link_elem = div.find('a')
snippet_elem = div.find('span', class_='aCOpRe') or div.find('span', class_='st')
if title_elem and link_elem:
results.append({
'title': title_elem.get_text(),
'body': snippet_elem.get_text() if snippet_elem else "No snippet available",
'url': link_elem.get('href', '#')
})
if results:
print(f"βœ… Found {len(results)} real Google results")
return results
except Exception as e:
print(f"Google search failed: {e}")
# Method 2: Try Bing Search
try:
bing_url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
response = self.session.get(bing_url, timeout=5)
soup = BeautifulSoup(response.text, 'html.parser')
# Find Bing results
for li in soup.find_all('li', class_='b_algo')[:num_results]:
h2 = li.find('h2')
if h2:
link = h2.find('a')
snippet = li.find('p')
if link:
results.append({
'title': link.get_text(),
'body': snippet.get_text() if snippet else "No description",
'url': link.get('href', '#')
})
if results:
print(f"βœ… Found {len(results)} real Bing results")
return results
except Exception as e:
print(f"Bing search failed: {e}")
# Method 3: Try Wikipedia API
try:
wiki_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit={num_results}&format=json"
response = self.session.get(wiki_url, timeout=5)
data = response.json()
if len(data) >= 4:
titles = data[1]
descriptions = data[2]
urls = data[3]
for i in range(min(len(titles), num_results)):
results.append({
'title': titles[i],
'body': descriptions[i] if i < len(descriptions) else "Wikipedia article",
'url': urls[i] if i < len(urls) else f"https://en.wikipedia.org/wiki/{titles[i].replace(' ', '_')}"
})
if results:
print(f"βœ… Found {len(results)} real Wikipedia results")
return results
except Exception as e:
print(f"Wikipedia search failed: {e}")
# Method 4: Try arXiv for academic queries
if "research" in query.lower() or "paper" in query.lower() or "study" in query.lower():
try:
arxiv_url = f"http://export.arxiv.org/api/query?search_query=all:{urllib.parse.quote(query)}&max_results={num_results}"
response = self.session.get(arxiv_url, timeout=5)
soup = BeautifulSoup(response.text, 'xml')
for entry in soup.find_all('entry')[:num_results]:
title = entry.find('title')
summary = entry.find('summary')
link = entry.find('id')
if title and link:
results.append({
'title': title.get_text().strip(),
'body': summary.get_text()[:200].strip() if summary else "Academic paper",
'url': link.get_text().strip()
})
if results:
print(f"βœ… Found {len(results)} real arXiv results")
return results
except Exception as e:
print(f"arXiv search failed: {e}")
# If all methods fail, return a message
print("❌ All search methods failed, returning fallback")
return [{
'title': f"Search for: {query}",
'body': "Unable to fetch real-time results. Please try a different query or check your connection.",
'url': f"https://www.google.com/search?q={urllib.parse.quote(query)}"
}]
def research_with_sources(query, temperature=0.5):
"""Research with REAL web sources"""
if not query:
return "Please enter a research query"
print(f"πŸ” Researching: {query}")
# Get REAL search results
search_engine = RealWebSearch()
results = search_engine.search_web(query, 3)
# Build context from real sources
sources_text = ""
citations = []
for i, result in enumerate(results):
sources_text += f"[{i+1}] {result['title']}: {result['body']}\n"
citations.append(f"[{i+1}] {result['title']}\n {result['url']}")
# Generate analysis with Jan v1
prompt = f"""Based on these sources, analyze: {query}
Sources:
{sources_text}
Provide comprehensive analysis with key findings and implications:"""
inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
inputs = inputs.to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=400,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
analysis = response.replace(prompt, "").strip()
# Format with REAL sources
result = f"{analysis}\n\n" + "="*50 + "\nπŸ“š REAL SOURCES:\n\n"
for citation in citations:
result += citation + "\n\n"
return result
# Create interface
with gr.Blocks(title="Jan v1 Research - REAL Sources", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸš€ Jan v1 Research Assistant - WITH REAL WEB SEARCH
**Now with REAL sources from Google, Bing, Wikipedia, and arXiv!**
Powered by Jan v1 (4B params) - Like Perplexity but FREE
""")
with gr.Row():
with gr.Column():
query_input = gr.Textbox(
label="Research Query",
placeholder="Enter any topic to research with real sources...",
lines=2
)
temp_slider = gr.Slider(0.1, 0.9, value=0.5, label="Temperature")
search_btn = gr.Button("πŸ” Research with REAL Sources", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Analysis with Real Sources",
lines=20,
show_copy_button=True
)
search_btn.click(
research_with_sources,
inputs=[query_input, temp_slider],
outputs=output
)
gr.Examples(
examples=[
["latest AI developments 2024", 0.5],
["quantum computing breakthroughs", 0.6],
["climate change solutions", 0.5],
["Chinese microdrama trends", 0.6]
],
inputs=[query_input, temp_slider]
)
if __name__ == "__main__":
demo.launch()