Spaces:
Paused
Paused
File size: 5,726 Bytes
3fcfd23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
"""
Jan v1 Research Assistant - Simplified Version for CPU
Works without GPU - uses API approach
"""
import gradio as gr
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
def scrape_url(url: str) -> str:
"""Scrape and extract text from URL"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = ' '.join(chunk for chunk in chunks if chunk)
return text[:4000] # Limit to 4000 chars
except Exception as e:
return f"Error scraping URL: {str(e)}"
def research_assistant_simple(query: str, context: str = "") -> str:
"""
Simplified research assistant using Hugging Face Inference API
"""
# For now, return a structured analysis template
# This can be replaced with actual API calls to Jan v1 when available
if context.startswith('http'):
context = scrape_url(context)
analysis = f"""
# Research Analysis
## Query
{query}
## Context Summary
{context[:500] if context else "No context provided"}...
## Analysis Framework
### 1. Key Findings
- The context provides information about the topic
- Further analysis would require examining specific aspects
- Consider multiple perspectives on this subject
### 2. Critical Questions
- What are the primary assumptions?
- What evidence supports the main claims?
- What alternative viewpoints exist?
### 3. Research Directions
- Investigate primary sources
- Compare with related studies
- Examine historical context
### 4. Limitations
- Limited context provided
- Single source analysis
- Requires deeper investigation
### 5. Next Steps
- Gather additional sources
- Conduct comparative analysis
- Validate key claims
---
*Note: This is a simplified version. For full Jan v1 capabilities, GPU hardware is required.*
"""
return analysis
# Create Gradio interface
with gr.Blocks(title="Jan v1 Research Assistant (Simplified)", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π¬ Jan v1 Research Assistant (Simplified Version)
This is a CPU-compatible version with limited features.
For full Jan v1 (4B params) capabilities, GPU hardware is required.
### Available Features:
- π Web scraping and text extraction
- π Structured research framework
- π Context analysis
""")
with gr.Tab("Research Analysis"):
with gr.Row():
with gr.Column():
query = gr.Textbox(
label="Research Query",
placeholder="What would you like to research?",
lines=2
)
context = gr.Textbox(
label="Context (paste text or URL)",
placeholder="Paste article text or enter URL to analyze",
lines=5
)
analyze_btn = gr.Button("π Analyze", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Analysis Results",
lines=15
)
analyze_btn.click(
research_assistant_simple,
inputs=[query, context],
outputs=output
)
with gr.Tab("Web Scraper"):
with gr.Row():
with gr.Column():
url_input = gr.Textbox(
label="URL to Scrape",
placeholder="https://example.com/article",
lines=1
)
scrape_btn = gr.Button("π Extract Text", variant="primary")
with gr.Column():
scrape_output = gr.Textbox(
label="Extracted Text",
lines=10
)
scrape_btn.click(
scrape_url,
inputs=url_input,
outputs=scrape_output
)
with gr.Tab("Instructions"):
gr.Markdown("""
## π How to Enable Full Jan v1
This Space is currently running in simplified mode without the actual Jan v1 model.
To enable full capabilities:
1. **Go to Settings**: https://huggingface.co/spaces/darwincb/jan-v1-research/settings
2. **Select Hardware**: GPU T4 medium ($0.60/hour)
3. **Save changes**
4. **Wait 5 minutes** for rebuild
### Current Limitations (CPU mode):
- β No actual Jan v1 model (4B params needs GPU)
- β No AI-powered analysis
- β
Web scraping works
- β
Structured framework available
### With GPU Enabled:
- β
Full Jan v1 model (91.1% accuracy)
- β
AI-powered research analysis
- β
Entity extraction
- β
Multi-source comparison
- β
Research question generation
### Alternative Free Options:
- **Google Colab**: Run the full model for free
- **Kaggle Notebooks**: 30 hours free GPU/week
- **Local with Jan App**: If you have 8GB+ VRAM
""")
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
) |