import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load model and tokenizer model = AutoModelForCausalLM.from_pretrained( "FlameF0X/SnowflakeCore-G1-Tiny", trust_remote_code=True, force_download=True, use_safetensors=True, ) tokenizer = AutoTokenizer.from_pretrained( "FlameF0X/SnowflakeCore-G1-Tiny", trust_remote_code=True, force_download=True, use_safetensors=True, ) def custom_greedy_generate(prompt, max_length=50): model.eval() input_ids = tokenizer(prompt, return_tensors="pt").input_ids generated = input_ids with torch.no_grad(): for _ in range(max_length): outputs = model(input_ids=generated) next_token_logits = outputs["logits"][:, -1, :] next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1) generated = torch.cat((generated, next_token_id), dim=1) if next_token_id.item() == tokenizer.eos_token_id: break return tokenizer.decode(generated[0], skip_special_tokens=True) def gradio_generate(prompt): return custom_greedy_generate(prompt) iface = gr.Interface( fn=gradio_generate, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), outputs=gr.Textbox(label="Generated Text"), title="SnowflakeCore-G1-Tiny Text Generation", description="Enter a prompt and generate text using the SnowflakeCore-G1-Tiny model.", ) if __name__ == "__main__": iface.launch()