Spaces:
Paused
Paused
""" | |
Jan v1 Research Assistant - OPTIMIZED for speed | |
""" | |
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
# Initialize model with optimizations | |
print("π Loading Jan v1 optimized...") | |
model_name = "janhq/Jan-v1-4B" | |
# Load with 4-bit quantization for speed | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
load_in_4bit=True, # 4-bit is faster than 8-bit | |
trust_remote_code=True, | |
low_cpu_mem_usage=True | |
) | |
print("β Model loaded!") | |
def quick_search(query): | |
"""Ultra simple search""" | |
return [ | |
{'title': f'Result 1 for {query}', 'body': 'Recent developments and findings...', 'url': '#'}, | |
{'title': f'Result 2 for {query}', 'body': 'Expert analysis shows...', 'url': '#'}, | |
{'title': f'Result 3 for {query}', 'body': 'Current research indicates...', 'url': '#'} | |
] | |
def fast_research(query, temperature=0.4): | |
"""Optimized for speed""" | |
if not query: | |
return "Enter a query" | |
# Quick search | |
results = quick_search(query) | |
sources = "\n".join([f"[{i+1}] {r['title']}: {r['body']}" for i, r in enumerate(results)]) | |
# Shorter prompt for speed | |
prompt = f"Query: {query}\nSources: {sources}\n\nProvide brief analysis:" | |
# Generate with limits | |
inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True) | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=200, # Limit output for speed | |
temperature=temperature, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
analysis = response.replace(prompt, "").strip() | |
# Add sources | |
result = f"{analysis}\n\nπ SOURCES:\n" | |
for i, r in enumerate(results): | |
result += f"[{i+1}] {r['title']}\n" | |
return result | |
# Simple interface | |
demo = gr.Interface( | |
fn=fast_research, | |
inputs=[ | |
gr.Textbox(label="Research Query", lines=2), | |
gr.Slider(0.1, 0.9, value=0.4, label="Temperature") | |
], | |
outputs=gr.Textbox(label="Analysis", lines=15), | |
title="Jan v1 Research - FAST VERSION", | |
description="Optimized for speed - 30 second responses" | |
) | |
if __name__ == "__main__": | |
demo.launch() |