Spaces:

darwincb
/

jan-v1-research

Paused

App Files Files Community

jan-v1-research / app.py

darwincb

Add multiple app versions - minimal for fast loading

7be49ed 22 days ago

raw

history blame

9.15 kB

	"""
	Jan v1 Research Assistant - WITH REAL WEB SEARCH
	"""

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import requests
	from bs4 import BeautifulSoup
	import json
	import urllib.parse

	# Initialize model with error handling
	print("🚀 Loading Jan v1...")
	model_name = "janhq/Jan-v1-4B"

	try:
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	load_in_4bit=True,
	trust_remote_code=True,
	low_cpu_mem_usage=True
	)
	print("✅ Jan v1 loaded!")
	model_loaded = True
	except Exception as e:
	print(f"❌ Error loading Jan v1: {e}")
	print("🔄 Using simplified fallback...")
	# Simple fallback that always works
	tokenizer = None
	model = None
	model_loaded = False

	class RealWebSearch:
	def __init__(self):
	self.session = requests.Session()
	self.session.headers.update({
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
	})

	def search_web(self, query, num_results=3):
	"""Real web search using multiple methods"""
	results = []

	# Method 1: Try Google Search (via scraping)
	try:
	search_url = f"https://www.google.com/search?q={urllib.parse.quote(query)}"
	response = self.session.get(search_url, timeout=5)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find search results
	search_divs = soup.find_all('div', class_='g')[:num_results]

	for div in search_divs:
	title_elem = div.find('h3')
	link_elem = div.find('a')
	snippet_elem = div.find('span', class_='aCOpRe') or div.find('span', class_='st')

	if title_elem and link_elem:
	results.append({
	'title': title_elem.get_text(),
	'body': snippet_elem.get_text() if snippet_elem else "No snippet available",
	'url': link_elem.get('href', '#')
	})

	if results:
	print(f"✅ Found {len(results)} real Google results")
	return results
	except Exception as e:
	print(f"Google search failed: {e}")

	# Method 2: Try Bing Search
	try:
	bing_url = f"https://www.bing.com/search?q={urllib.parse.quote(query)}"
	response = self.session.get(bing_url, timeout=5)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find Bing results
	for li in soup.find_all('li', class_='b_algo')[:num_results]:
	h2 = li.find('h2')
	if h2:
	link = h2.find('a')
	snippet = li.find('p')

	if link:
	results.append({
	'title': link.get_text(),
	'body': snippet.get_text() if snippet else "No description",
	'url': link.get('href', '#')
	})

	if results:
	print(f"✅ Found {len(results)} real Bing results")
	return results
	except Exception as e:
	print(f"Bing search failed: {e}")

	# Method 3: Try Wikipedia API
	try:
	wiki_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit={num_results}&format=json"
	response = self.session.get(wiki_url, timeout=5)
	data = response.json()

	if len(data) >= 4:
	titles = data[1]
	descriptions = data[2]
	urls = data[3]

	for i in range(min(len(titles), num_results)):
	results.append({
	'title': titles[i],
	'body': descriptions[i] if i < len(descriptions) else "Wikipedia article",
	'url': urls[i] if i < len(urls) else f"https://en.wikipedia.org/wiki/{titles[i].replace(' ', '_')}"
	})

	if results:
	print(f"✅ Found {len(results)} real Wikipedia results")
	return results
	except Exception as e:
	print(f"Wikipedia search failed: {e}")

	# Method 4: Try arXiv for academic queries
	if "research" in query.lower() or "paper" in query.lower() or "study" in query.lower():
	try:
	arxiv_url = f"http://export.arxiv.org/api/query?search_query=all:{urllib.parse.quote(query)}&max_results={num_results}"
	response = self.session.get(arxiv_url, timeout=5)
	soup = BeautifulSoup(response.text, 'xml')

	for entry in soup.find_all('entry')[:num_results]:
	title = entry.find('title')
	summary = entry.find('summary')
	link = entry.find('id')

	if title and link:
	results.append({
	'title': title.get_text().strip(),
	'body': summary.get_text()[:200].strip() if summary else "Academic paper",
	'url': link.get_text().strip()
	})

	if results:
	print(f"✅ Found {len(results)} real arXiv results")
	return results
	except Exception as e:
	print(f"arXiv search failed: {e}")

	# If all methods fail, return a message
	print("❌ All search methods failed, returning fallback")
	return [{
	'title': f"Search for: {query}",
	'body': "Unable to fetch real-time results. Please try a different query or check your connection.",
	'url': f"https://www.google.com/search?q={urllib.parse.quote(query)}"
	}]

	def research_with_sources(query, temperature=0.5):
	"""Research with REAL web sources"""
	if not query:
	return "Please enter a research query"

	print(f"🔍 Researching: {query}")

	# Get REAL search results
	search_engine = RealWebSearch()
	results = search_engine.search_web(query, 3)

	# Build context from real sources
	sources_text = ""
	citations = []

	for i, result in enumerate(results):
	sources_text += f"[{i+1}] {result['title']}: {result['body']}\n"
	citations.append(f"[{i+1}] {result['title']}\n {result['url']}")

	# Generate analysis with Jan v1
	prompt = f"""Based on these sources, analyze: {query}

	Sources:
	{sources_text}

	Provide comprehensive analysis with key findings and implications:"""

	inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
	inputs = inputs.to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=400,
	temperature=temperature,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	analysis = response.replace(prompt, "").strip()

	# Format with REAL sources
	result = f"{analysis}\n\n" + "="*50 + "\n📚 REAL SOURCES:\n\n"
	for citation in citations:
	result += citation + "\n\n"

	return result

	# Create interface
	with gr.Blocks(title="Jan v1 Research - REAL Sources", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🚀 Jan v1 Research Assistant - WITH REAL WEB SEARCH

	Now with REAL sources from Google, Bing, Wikipedia, and arXiv!

	Powered by Jan v1 (4B params) - Like Perplexity but FREE
	""")

	with gr.Row():
	with gr.Column():
	query_input = gr.Textbox(
	label="Research Query",
	placeholder="Enter any topic to research with real sources...",
	lines=2
	)
	temp_slider = gr.Slider(0.1, 0.9, value=0.5, label="Temperature")
	search_btn = gr.Button("🔍 Research with REAL Sources", variant="primary")

	with gr.Column():
	output = gr.Textbox(
	label="Analysis with Real Sources",
	lines=20,
	show_copy_button=True
	)

	search_btn.click(
	research_with_sources,
	inputs=[query_input, temp_slider],
	outputs=output
	)

	gr.Examples(
	examples=[
	["latest AI developments 2024", 0.5],
	["quantum computing breakthroughs", 0.6],
	["climate change solutions", 0.5],
	["Chinese microdrama trends", 0.6]
	],
	inputs=[query_input, temp_slider]
	)

	if __name__ == "__main__":
	demo.launch()