Spaces:

darwincb
/

jan-v1-research

Paused

File size: 5,726 Bytes

3fcfd23

"""
Jan v1 Research Assistant - Simplified Version for CPU
Works without GPU - uses API approach
"""

import gradio as gr
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime

def scrape_url(url: str) -> str:
    """Scrape and extract text from URL"""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Remove script and style elements
        for script in soup(["script", "style"]):
            script.decompose()
        
        text = soup.get_text()
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = ' '.join(chunk for chunk in chunks if chunk)
        
        return text[:4000]  # Limit to 4000 chars
    except Exception as e:
        return f"Error scraping URL: {str(e)}"

def research_assistant_simple(query: str, context: str = "") -> str:
    """
    Simplified research assistant using Hugging Face Inference API
    """
    # For now, return a structured analysis template
    # This can be replaced with actual API calls to Jan v1 when available
    
    if context.startswith('http'):
        context = scrape_url(context)
    
    analysis = f"""
# Research Analysis

## Query
{query}

## Context Summary
{context[:500] if context else "No context provided"}...

## Analysis Framework

### 1. Key Findings
- The context provides information about the topic
- Further analysis would require examining specific aspects
- Consider multiple perspectives on this subject

### 2. Critical Questions
- What are the primary assumptions?
- What evidence supports the main claims?
- What alternative viewpoints exist?

### 3. Research Directions
- Investigate primary sources
- Compare with related studies
- Examine historical context

### 4. Limitations
- Limited context provided
- Single source analysis
- Requires deeper investigation

### 5. Next Steps
- Gather additional sources
- Conduct comparative analysis
- Validate key claims

---
*Note: This is a simplified version. For full Jan v1 capabilities, GPU hardware is required.*
"""
    
    return analysis

# Create Gradio interface
with gr.Blocks(title="Jan v1 Research Assistant (Simplified)", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🔬 Jan v1 Research Assistant (Simplified Version)
    
    This is a CPU-compatible version with limited features.
    For full Jan v1 (4B params) capabilities, GPU hardware is required.
    
    ### Available Features:
    - 🌐 Web scraping and text extraction
    - 📝 Structured research framework
    - 🔍 Context analysis
    """)
    
    with gr.Tab("Research Analysis"):
        with gr.Row():
            with gr.Column():
                query = gr.Textbox(
                    label="Research Query",
                    placeholder="What would you like to research?",
                    lines=2
                )
                context = gr.Textbox(
                    label="Context (paste text or URL)",
                    placeholder="Paste article text or enter URL to analyze",
                    lines=5
                )
                analyze_btn = gr.Button("🔍 Analyze", variant="primary")
            
            with gr.Column():
                output = gr.Textbox(
                    label="Analysis Results",
                    lines=15
                )
        
        analyze_btn.click(
            research_assistant_simple,
            inputs=[query, context],
            outputs=output
        )
    
    with gr.Tab("Web Scraper"):
        with gr.Row():
            with gr.Column():
                url_input = gr.Textbox(
                    label="URL to Scrape",
                    placeholder="https://example.com/article",
                    lines=1
                )
                scrape_btn = gr.Button("🌐 Extract Text", variant="primary")
            
            with gr.Column():
                scrape_output = gr.Textbox(
                    label="Extracted Text",
                    lines=10
                )
        
        scrape_btn.click(
            scrape_url,
            inputs=url_input,
            outputs=scrape_output
        )
    
    with gr.Tab("Instructions"):
        gr.Markdown("""
        ## 📋 How to Enable Full Jan v1
        
        This Space is currently running in simplified mode without the actual Jan v1 model.
        
        To enable full capabilities:
        
        1. **Go to Settings**: https://huggingface.co/spaces/darwincb/jan-v1-research/settings
        2. **Select Hardware**: GPU T4 medium ($0.60/hour)
        3. **Save changes**
        4. **Wait 5 minutes** for rebuild
        
        ### Current Limitations (CPU mode):
        - ❌ No actual Jan v1 model (4B params needs GPU)
        - ❌ No AI-powered analysis
        - ✅ Web scraping works
        - ✅ Structured framework available
        
        ### With GPU Enabled:
        - ✅ Full Jan v1 model (91.1% accuracy)
        - ✅ AI-powered research analysis
        - ✅ Entity extraction
        - ✅ Multi-source comparison
        - ✅ Research question generation
        
        ### Alternative Free Options:
        - **Google Colab**: Run the full model for free
        - **Kaggle Notebooks**: 30 hours free GPU/week
        - **Local with Jan App**: If you have 8GB+ VRAM
        """)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )