File size: 5,726 Bytes
3fcfd23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""
Jan v1 Research Assistant - Simplified Version for CPU
Works without GPU - uses API approach
"""

import gradio as gr
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime

def scrape_url(url: str) -> str:
    """Scrape and extract text from URL"""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Remove script and style elements
        for script in soup(["script", "style"]):
            script.decompose()
        
        text = soup.get_text()
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = ' '.join(chunk for chunk in chunks if chunk)
        
        return text[:4000]  # Limit to 4000 chars
    except Exception as e:
        return f"Error scraping URL: {str(e)}"

def research_assistant_simple(query: str, context: str = "") -> str:
    """
    Simplified research assistant using Hugging Face Inference API
    """
    # For now, return a structured analysis template
    # This can be replaced with actual API calls to Jan v1 when available
    
    if context.startswith('http'):
        context = scrape_url(context)
    
    analysis = f"""
# Research Analysis

## Query
{query}

## Context Summary
{context[:500] if context else "No context provided"}...

## Analysis Framework

### 1. Key Findings
- The context provides information about the topic
- Further analysis would require examining specific aspects
- Consider multiple perspectives on this subject

### 2. Critical Questions
- What are the primary assumptions?
- What evidence supports the main claims?
- What alternative viewpoints exist?

### 3. Research Directions
- Investigate primary sources
- Compare with related studies
- Examine historical context

### 4. Limitations
- Limited context provided
- Single source analysis
- Requires deeper investigation

### 5. Next Steps
- Gather additional sources
- Conduct comparative analysis
- Validate key claims

---
*Note: This is a simplified version. For full Jan v1 capabilities, GPU hardware is required.*
"""
    
    return analysis

# Create Gradio interface
with gr.Blocks(title="Jan v1 Research Assistant (Simplified)", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # πŸ”¬ Jan v1 Research Assistant (Simplified Version)
    
    This is a CPU-compatible version with limited features.
    For full Jan v1 (4B params) capabilities, GPU hardware is required.
    
    ### Available Features:
    - 🌐 Web scraping and text extraction
    - πŸ“ Structured research framework
    - πŸ” Context analysis
    """)
    
    with gr.Tab("Research Analysis"):
        with gr.Row():
            with gr.Column():
                query = gr.Textbox(
                    label="Research Query",
                    placeholder="What would you like to research?",
                    lines=2
                )
                context = gr.Textbox(
                    label="Context (paste text or URL)",
                    placeholder="Paste article text or enter URL to analyze",
                    lines=5
                )
                analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
            
            with gr.Column():
                output = gr.Textbox(
                    label="Analysis Results",
                    lines=15
                )
        
        analyze_btn.click(
            research_assistant_simple,
            inputs=[query, context],
            outputs=output
        )
    
    with gr.Tab("Web Scraper"):
        with gr.Row():
            with gr.Column():
                url_input = gr.Textbox(
                    label="URL to Scrape",
                    placeholder="https://example.com/article",
                    lines=1
                )
                scrape_btn = gr.Button("🌐 Extract Text", variant="primary")
            
            with gr.Column():
                scrape_output = gr.Textbox(
                    label="Extracted Text",
                    lines=10
                )
        
        scrape_btn.click(
            scrape_url,
            inputs=url_input,
            outputs=scrape_output
        )
    
    with gr.Tab("Instructions"):
        gr.Markdown("""
        ## πŸ“‹ How to Enable Full Jan v1
        
        This Space is currently running in simplified mode without the actual Jan v1 model.
        
        To enable full capabilities:
        
        1. **Go to Settings**: https://huggingface.co/spaces/darwincb/jan-v1-research/settings
        2. **Select Hardware**: GPU T4 medium ($0.60/hour)
        3. **Save changes**
        4. **Wait 5 minutes** for rebuild
        
        ### Current Limitations (CPU mode):
        - ❌ No actual Jan v1 model (4B params needs GPU)
        - ❌ No AI-powered analysis
        - βœ… Web scraping works
        - βœ… Structured framework available
        
        ### With GPU Enabled:
        - βœ… Full Jan v1 model (91.1% accuracy)
        - βœ… AI-powered research analysis
        - βœ… Entity extraction
        - βœ… Multi-source comparison
        - βœ… Research question generation
        
        ### Alternative Free Options:
        - **Google Colab**: Run the full model for free
        - **Kaggle Notebooks**: 30 hours free GPU/week
        - **Local with Jan App**: If you have 8GB+ VRAM
        """)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )