Spaces:
Paused
Paused
""" | |
Jan v1 Research Assistant - Simplified Version for CPU | |
Works without GPU - uses API approach | |
""" | |
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import json | |
from datetime import datetime | |
def scrape_url(url: str) -> str: | |
"""Scrape and extract text from URL""" | |
try: | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' | |
} | |
response = requests.get(url, headers=headers, timeout=10) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Remove script and style elements | |
for script in soup(["script", "style"]): | |
script.decompose() | |
text = soup.get_text() | |
lines = (line.strip() for line in text.splitlines()) | |
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) | |
text = ' '.join(chunk for chunk in chunks if chunk) | |
return text[:4000] # Limit to 4000 chars | |
except Exception as e: | |
return f"Error scraping URL: {str(e)}" | |
def research_assistant_simple(query: str, context: str = "") -> str: | |
""" | |
Simplified research assistant using Hugging Face Inference API | |
""" | |
# For now, return a structured analysis template | |
# This can be replaced with actual API calls to Jan v1 when available | |
if context.startswith('http'): | |
context = scrape_url(context) | |
analysis = f""" | |
# Research Analysis | |
## Query | |
{query} | |
## Context Summary | |
{context[:500] if context else "No context provided"}... | |
## Analysis Framework | |
### 1. Key Findings | |
- The context provides information about the topic | |
- Further analysis would require examining specific aspects | |
- Consider multiple perspectives on this subject | |
### 2. Critical Questions | |
- What are the primary assumptions? | |
- What evidence supports the main claims? | |
- What alternative viewpoints exist? | |
### 3. Research Directions | |
- Investigate primary sources | |
- Compare with related studies | |
- Examine historical context | |
### 4. Limitations | |
- Limited context provided | |
- Single source analysis | |
- Requires deeper investigation | |
### 5. Next Steps | |
- Gather additional sources | |
- Conduct comparative analysis | |
- Validate key claims | |
--- | |
*Note: This is a simplified version. For full Jan v1 capabilities, GPU hardware is required.* | |
""" | |
return analysis | |
# Create Gradio interface | |
with gr.Blocks(title="Jan v1 Research Assistant (Simplified)", theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# π¬ Jan v1 Research Assistant (Simplified Version) | |
This is a CPU-compatible version with limited features. | |
For full Jan v1 (4B params) capabilities, GPU hardware is required. | |
### Available Features: | |
- π Web scraping and text extraction | |
- π Structured research framework | |
- π Context analysis | |
""") | |
with gr.Tab("Research Analysis"): | |
with gr.Row(): | |
with gr.Column(): | |
query = gr.Textbox( | |
label="Research Query", | |
placeholder="What would you like to research?", | |
lines=2 | |
) | |
context = gr.Textbox( | |
label="Context (paste text or URL)", | |
placeholder="Paste article text or enter URL to analyze", | |
lines=5 | |
) | |
analyze_btn = gr.Button("π Analyze", variant="primary") | |
with gr.Column(): | |
output = gr.Textbox( | |
label="Analysis Results", | |
lines=15 | |
) | |
analyze_btn.click( | |
research_assistant_simple, | |
inputs=[query, context], | |
outputs=output | |
) | |
with gr.Tab("Web Scraper"): | |
with gr.Row(): | |
with gr.Column(): | |
url_input = gr.Textbox( | |
label="URL to Scrape", | |
placeholder="https://example.com/article", | |
lines=1 | |
) | |
scrape_btn = gr.Button("π Extract Text", variant="primary") | |
with gr.Column(): | |
scrape_output = gr.Textbox( | |
label="Extracted Text", | |
lines=10 | |
) | |
scrape_btn.click( | |
scrape_url, | |
inputs=url_input, | |
outputs=scrape_output | |
) | |
with gr.Tab("Instructions"): | |
gr.Markdown(""" | |
## π How to Enable Full Jan v1 | |
This Space is currently running in simplified mode without the actual Jan v1 model. | |
To enable full capabilities: | |
1. **Go to Settings**: https://huggingface.co/spaces/darwincb/jan-v1-research/settings | |
2. **Select Hardware**: GPU T4 medium ($0.60/hour) | |
3. **Save changes** | |
4. **Wait 5 minutes** for rebuild | |
### Current Limitations (CPU mode): | |
- β No actual Jan v1 model (4B params needs GPU) | |
- β No AI-powered analysis | |
- β Web scraping works | |
- β Structured framework available | |
### With GPU Enabled: | |
- β Full Jan v1 model (91.1% accuracy) | |
- β AI-powered research analysis | |
- β Entity extraction | |
- β Multi-source comparison | |
- β Research question generation | |
### Alternative Free Options: | |
- **Google Colab**: Run the full model for free | |
- **Kaggle Notebooks**: 30 hours free GPU/week | |
- **Local with Jan App**: If you have 8GB+ VRAM | |
""") | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False | |
) |