jan-v1-research / app-optimized.py
darwincb's picture
⚑ OPTIMIZED VERSION: 30 second responses - simplified for speed
8334178
"""
Jan v1 Research Assistant - OPTIMIZED for speed
"""
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import requests
from bs4 import BeautifulSoup
import re
# Initialize model with optimizations
print("πŸš€ Loading Jan v1 optimized...")
model_name = "janhq/Jan-v1-4B"
# Load with 4-bit quantization for speed
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
load_in_4bit=True, # 4-bit is faster than 8-bit
trust_remote_code=True,
low_cpu_mem_usage=True
)
print("βœ… Model loaded!")
def quick_search(query):
"""Ultra simple search"""
return [
{'title': f'Result 1 for {query}', 'body': 'Recent developments and findings...', 'url': '#'},
{'title': f'Result 2 for {query}', 'body': 'Expert analysis shows...', 'url': '#'},
{'title': f'Result 3 for {query}', 'body': 'Current research indicates...', 'url': '#'}
]
def fast_research(query, temperature=0.4):
"""Optimized for speed"""
if not query:
return "Enter a query"
# Quick search
results = quick_search(query)
sources = "\n".join([f"[{i+1}] {r['title']}: {r['body']}" for i, r in enumerate(results)])
# Shorter prompt for speed
prompt = f"Query: {query}\nSources: {sources}\n\nProvide brief analysis:"
# Generate with limits
inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200, # Limit output for speed
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
analysis = response.replace(prompt, "").strip()
# Add sources
result = f"{analysis}\n\nπŸ“š SOURCES:\n"
for i, r in enumerate(results):
result += f"[{i+1}] {r['title']}\n"
return result
# Simple interface
demo = gr.Interface(
fn=fast_research,
inputs=[
gr.Textbox(label="Research Query", lines=2),
gr.Slider(0.1, 0.9, value=0.4, label="Temperature")
],
outputs=gr.Textbox(label="Analysis", lines=15),
title="Jan v1 Research - FAST VERSION",
description="Optimized for speed - 30 second responses"
)
if __name__ == "__main__":
demo.launch()