Spaces:

darwincb
/

jan-v1-research

Paused

App Files Files Community

jan-v1-research / app-simple.py

darwincb

Simplify to CPU version for initial testing

3fcfd23 20 days ago

raw

history blame

5.73 kB

	"""
	Jan v1 Research Assistant - Simplified Version for CPU
	Works without GPU - uses API approach
	"""

	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	import json
	from datetime import datetime

	def scrape_url(url: str) -> str:
	"""Scrape and extract text from URL"""
	try:
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
	}
	response = requests.get(url, headers=headers, timeout=10)
	soup = BeautifulSoup(response.content, 'html.parser')

	# Remove script and style elements
	for script in soup(["script", "style"]):
	script.decompose()

	text = soup.get_text()
	lines = (line.strip() for line in text.splitlines())
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	text = ' '.join(chunk for chunk in chunks if chunk)

	return text[:4000] # Limit to 4000 chars
	except Exception as e:
	return f"Error scraping URL: {str(e)}"

	def research_assistant_simple(query: str, context: str = "") -> str:
	"""
	Simplified research assistant using Hugging Face Inference API
	"""
	# For now, return a structured analysis template
	# This can be replaced with actual API calls to Jan v1 when available

	if context.startswith('http'):
	context = scrape_url(context)

	analysis = f"""
	# Research Analysis

	## Query
	{query}

	## Context Summary
	{context[:500] if context else "No context provided"}...

	## Analysis Framework

	### 1. Key Findings
	- The context provides information about the topic
	- Further analysis would require examining specific aspects
	- Consider multiple perspectives on this subject

	### 2. Critical Questions
	- What are the primary assumptions?
	- What evidence supports the main claims?
	- What alternative viewpoints exist?

	### 3. Research Directions
	- Investigate primary sources
	- Compare with related studies
	- Examine historical context

	### 4. Limitations
	- Limited context provided
	- Single source analysis
	- Requires deeper investigation

	### 5. Next Steps
	- Gather additional sources
	- Conduct comparative analysis
	- Validate key claims

	---
	Note: This is a simplified version. For full Jan v1 capabilities, GPU hardware is required.
	"""

	return analysis

	# Create Gradio interface
	with gr.Blocks(title="Jan v1 Research Assistant (Simplified)", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🔬 Jan v1 Research Assistant (Simplified Version)

	This is a CPU-compatible version with limited features.
	For full Jan v1 (4B params) capabilities, GPU hardware is required.

	### Available Features:
	- 🌐 Web scraping and text extraction
	- 📝 Structured research framework
	- 🔍 Context analysis
	""")

	with gr.Tab("Research Analysis"):
	with gr.Row():
	with gr.Column():
	query = gr.Textbox(
	label="Research Query",
	placeholder="What would you like to research?",
	lines=2
	)
	context = gr.Textbox(
	label="Context (paste text or URL)",
	placeholder="Paste article text or enter URL to analyze",
	lines=5
	)
	analyze_btn = gr.Button("🔍 Analyze", variant="primary")

	with gr.Column():
	output = gr.Textbox(
	label="Analysis Results",
	lines=15
	)

	analyze_btn.click(
	research_assistant_simple,
	inputs=[query, context],
	outputs=output
	)

	with gr.Tab("Web Scraper"):
	with gr.Row():
	with gr.Column():
	url_input = gr.Textbox(
	label="URL to Scrape",
	placeholder="https://example.com/article",
	lines=1
	)
	scrape_btn = gr.Button("🌐 Extract Text", variant="primary")

	with gr.Column():
	scrape_output = gr.Textbox(
	label="Extracted Text",
	lines=10
	)

	scrape_btn.click(
	scrape_url,
	inputs=url_input,
	outputs=scrape_output
	)

	with gr.Tab("Instructions"):
	gr.Markdown("""
	## 📋 How to Enable Full Jan v1

	This Space is currently running in simplified mode without the actual Jan v1 model.

	To enable full capabilities:

	1. Go to Settings: https://huggingface.co/spaces/darwincb/jan-v1-research/settings
	2. Select Hardware: GPU T4 medium ($0.60/hour)
	3. Save changes
	4. Wait 5 minutes for rebuild

	### Current Limitations (CPU mode):
	- ❌ No actual Jan v1 model (4B params needs GPU)
	- ❌ No AI-powered analysis
	- ✅ Web scraping works
	- ✅ Structured framework available

	### With GPU Enabled:
	- ✅ Full Jan v1 model (91.1% accuracy)
	- ✅ AI-powered research analysis
	- ✅ Entity extraction
	- ✅ Multi-source comparison
	- ✅ Research question generation

	### Alternative Free Options:
	- Google Colab: Run the full model for free
	- Kaggle Notebooks: 30 hours free GPU/week
	- Local with Jan App: If you have 8GB+ VRAM
	""")

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)