# ๐Ÿš€ Jan App COMPLETO - Google Colab (GRATIS)

Recreando la Jan App completa con:
- โœ… Jan v1 model (4B params)
- โœ… Web search en tiempo real
- โœ… Sources con citations
- โœ… Browser automation
- โœ… Como Perplexity pero GRATIS

**Setup:** Runtime โ†’ GPU T4 โ†’ Run all cells

## ๐Ÿ“ฆ 1. Install Dependencies

In [None]:
# Install core ML dependencies
!pip install transformers torch gradio accelerate bitsandbytes sentencepiece -q

# Install web search and scraping tools
!pip install googlesearch-python beautifulsoup4 requests selenium -q
!pip install duckduckgo-search newspaper3k trafilatura -q

# Install utilities
!pip install python-dateutil validators urllib3 -q

print("โœ… All dependencies installed!")

## ๐Ÿง  2. Load Jan v1 Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

print("๐Ÿš€ Loading Jan v1 model...")
model_name = "janhq/Jan-v1-4B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
 model_name,
 torch_dtype=torch.float16,
 device_map="auto",
 load_in_8bit=True
)

print("โœ… Jan v1 loaded successfully!")
print(f"๐Ÿ“Š Model: {model.num_parameters()/1e9:.2f}B parameters")

## ๐Ÿ” 3. Web Search Engine

In [None]:
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from datetime import datetime
import validators
import json
import re

class WebSearchEngine:
 def __init__(self):
 self.ddgs = DDGS()
 self.session = requests.Session()
 self.session.headers.update({
 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
 })
 
 def search_web(self, query: str, num_results: int = 5) -> list:
 """Search web and return structured results"""
 try:
 print(f"๐Ÿ” Searching: {query}")
 results = list(self.ddgs.text(query, max_results=num_results))
 
 enriched_results = []
 for i, result in enumerate(results[:num_results]):
 enriched = {
 'title': result.get('title', 'No title'),
 'url': result.get('href', ''),
 'snippet': result.get('body', ''),
 'content': self.extract_content(result.get('href', '')),
 'rank': i + 1
 }
 enriched_results.append(enriched)
 
 return enriched_results
 except Exception as e:
 print(f"โŒ Search error: {e}")
 return []
 
 def extract_content(self, url: str) -> str:
 """Extract clean content from URL"""
 try:
 if not validators.url(url):
 return ""
 
 response = self.session.get(url, timeout=10)
 soup = BeautifulSoup(response.content, 'html.parser')
 
 # Remove unwanted elements
 for element in soup(['script', 'style', 'nav', 'footer', 'header']):
 element.decompose()
 
 # Extract text
 text = soup.get_text(separator=' ', strip=True)
 
 # Clean and limit
 text = re.sub(r'\s+', ' ', text)
 return text[:2000] # Limit content length
 
 except Exception as e:
 print(f"โš ๏ธ Content extraction failed for {url}: {e}")
 return ""

# Initialize search engine
search_engine = WebSearchEngine()
print("โœ… Web search engine ready!")

## ๐Ÿค– 4. Jan App Research Assistant

In [None]:
class JanAppAssistant:
 def __init__(self, model, tokenizer, search_engine):
 self.model = model
 self.tokenizer = tokenizer
 self.search_engine = search_engine
 
 def research_with_sources(self, query: str, num_sources: int = 3, temperature: float = 0.6):
 """Complete research with real-time web sources like Perplexity"""
 
 # Step 1: Web search
 print("๐Ÿ” Step 1: Searching the web...")
 search_results = self.search_engine.search_web(query, num_sources)
 
 if not search_results:
 return "โŒ No search results found. Try a different query."
 
 # Step 2: Compile sources
 print("๐Ÿ“š Step 2: Processing sources...")
 sources_text = ""
 citations = []
 
 for i, result in enumerate(search_results):
 source_num = i + 1
 sources_text += f"\n\n[{source_num}] {result['title']}\n"
 sources_text += f"URL: {result['url']}\n"
 sources_text += f"Content: {result['snippet']} {result['content'][:800]}\n"
 
 citations.append({
 'number': source_num,
 'title': result['title'],
 'url': result['url']
 })
 
 # Step 3: Generate analysis with Jan v1
 print("๐Ÿง  Step 3: Analyzing with Jan v1...")
 prompt = f"""You are a research analyst. Based on the current web sources below, provide a comprehensive analysis.

QUERY: {query}

CURRENT WEB SOURCES:
{sources_text}

Provide analysis with:
1. Executive Summary
2. Key Findings (reference sources with [1], [2], etc.)
3. Critical Analysis
4. Implications
5. Areas for Further Research

Analysis:"""
 
 # Generate response
 inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
 inputs = inputs.to(self.model.device)
 
 with torch.no_grad():
 outputs = self.model.generate(
 **inputs,
 max_new_tokens=1024,
 temperature=temperature,
 top_p=0.95,
 top_k=20,
 do_sample=True,
 pad_token_id=self.tokenizer.eos_token_id
 )
 
 response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 analysis = response.replace(prompt, "").strip()
 
 # Format final response
 final_response = f"{analysis}\n\n" + "="*50 + "\n๐Ÿ“š SOURCES:\n\n"
 
 for citation in citations:
 final_response += f"[{citation['number']}] {citation['title']}\n"
 final_response += f" {citation['url']}\n\n"
 
 return final_response
 
 def quick_answer(self, question: str, temperature: float = 0.4):
 """Quick answer with web verification"""
 
 # Search for recent info
 search_results = self.search_engine.search_web(question, 2)
 
 context = ""
 if search_results:
 context = f"Recent information: {search_results[0]['snippet']}"
 
 prompt = f"""Question: {question}
 
{context}
 
Provide a concise, accurate answer:"""
 
 inputs = self.tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
 inputs = inputs.to(self.model.device)
 
 outputs = self.model.generate(
 **inputs,
 max_new_tokens=200,
 temperature=temperature,
 do_sample=True,
 pad_token_id=self.tokenizer.eos_token_id
 )
 
 response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 return response.replace(prompt, "").strip()

# Initialize Jan App Assistant
jan_app = JanAppAssistant(model, tokenizer, search_engine)
print("โœ… Jan App Assistant ready!")

## ๐ŸŽจ 5. Create Perplexity-like Interface

In [None]:
import gradio as gr

# Custom CSS for Perplexity-like styling
custom_css = """
.gradio-container {
 max-width: 1200px !important;
}
.sources-box {
 background: #f8f9fa;
 border-left: 4px solid #007bff;
 padding: 12px;
 margin: 10px 0;
}
"""

# Create the interface
with gr.Blocks(title="Jan App Complete - Research Assistant", theme=gr.themes.Soft(), css=custom_css) as demo:
 
 gr.Markdown("""
 # ๐Ÿš€ Jan App Complete - FREE Research Assistant
 
 **Powered by Jan v1 (4B) + Real-time Web Search**
 
 Like Perplexity, but completely FREE with Google Colab GPU!
 
 Features:
 - ๐Ÿ” Real-time web search
 - ๐Ÿ“š Source citations
 - ๐Ÿง  Jan v1 analysis (91.1% accuracy)
 - ๐Ÿ†“ 100% Free with GPU
 """)
 
 with gr.Tab("๐Ÿ”ฌ Research Mode"):
 with gr.Row():
 with gr.Column(scale=1):
 research_query = gr.Textbox(
 label="Research Query",
 placeholder="Ask anything - I'll search the web and analyze with Jan v1...",
 lines=3
 )
 
 with gr.Row():
 num_sources = gr.Slider(
 minimum=1, maximum=8, value=3, step=1,
 label="Number of Sources"
 )
 temperature = gr.Slider(
 minimum=0.1, maximum=1.0, value=0.6, step=0.1,
 label="Temperature (creativity)"
 )
 
 research_btn = gr.Button(
 "๐Ÿ” Research with Sources", 
 variant="primary", 
 size="lg"
 )
 
 with gr.Column(scale=2):
 research_output = gr.Textbox(
 label="Research Analysis + Sources",
 lines=20,
 show_copy_button=True
 )
 
 research_btn.click(
 jan_app.research_with_sources,
 inputs=[research_query, num_sources, temperature],
 outputs=research_output
 )
 
 with gr.Tab("โšก Quick Answer"):
 with gr.Row():
 with gr.Column():
 quick_question = gr.Textbox(
 label="Quick Question",
 placeholder="Ask a quick question for immediate answer...",
 lines=2
 )
 quick_btn = gr.Button("โšก Quick Answer", variant="secondary")
 
 with gr.Column():
 quick_output = gr.Textbox(
 label="Quick Answer",
 lines=8
 )
 
 quick_btn.click(
 jan_app.quick_answer,
 inputs=quick_question,
 outputs=quick_output
 )
 
 with gr.Tab("๐Ÿ“‹ Examples"):
 gr.Examples(
 examples=[
 ["What are the latest developments in artificial intelligence for 2024?", 4, 0.6],
 ["Compare the current market leaders in electric vehicles", 5, 0.5],
 ["What is the scientific consensus on climate change solutions?", 6, 0.4],
 ["Latest breakthroughs in quantum computing research", 3, 0.7],
 ["Current state of renewable energy adoption globally", 4, 0.5]
 ],
 inputs=[research_query, num_sources, temperature],
 label="Try these research examples:"
 )
 
 with gr.Tab("โ„น๏ธ About"):
 gr.Markdown("""
 ## How this works:
 
 1. **Web Search**: Uses DuckDuckGo to find current information
 2. **Content Extraction**: Scrapes and cleans web pages
 3. **Jan v1 Analysis**: 4B parameter model analyzes all sources
 4. **Source Citations**: Like Perplexity, shows all sources used
 
 ## Advantages over Perplexity:
 
 - โœ… **100% Free** (vs $20/month)
 - โœ… **No rate limits** (vs 5 queries/hour free)
 - โœ… **Full control** over model and parameters
 - โœ… **Privacy** (runs in your Colab)
 
 ## Technical specs:
 
 - **Model**: Jan v1 (4.02B parameters, 91.1% SimpleQA accuracy)
 - **Search**: DuckDuckGo API
 - **GPU**: Google Colab T4 (16GB VRAM)
 - **Framework**: Transformers + Gradio
 """)

# Launch the interface
demo.launch(share=True, debug=True)

print("๐ŸŽ‰ Jan App Complete is now running!")
print("๐Ÿ”— Share your link with others - it works for 72 hours!")

## ๐Ÿงช 6. Test the Complete System

In [None]:
# Test the complete Jan App
test_query = "What are the recent developments in AI safety research?"

print(f"๐Ÿงช Testing with query: {test_query}")
print("\n" + "="*60 + "\n")

result = jan_app.research_with_sources(test_query, num_sources=3)
print(result)